def setUp(self): self.tdir = PersistentTemporaryDirectory('_calibre_dbtest') self.db = LibraryDatabase2(self.tdir) f = open(os.path.join(self.tdir, 'test.txt'), 'w+b') f.write('test') paths = list(repeat(f, 3)) formats = list(repeat('txt', 3)) m1 = MetaInformation('Test Ebook 1', ['Test Author 1']) m1.tags = ['tag1', 'tag2'] m1.publisher = 'Test Publisher 1' m1.rating = 2 m1.series = 'Test Series 1' m1.series_index = 3 m1.author_sort = 'as1' m1.isbn = 'isbn1' m1.cover_data = ('jpg', self.img) m2 = MetaInformation('Test Ebook 2', ['Test Author 2']) m2.tags = ['tag3', 'tag4'] m2.publisher = 'Test Publisher 2' m2.rating = 3 m2.series = 'Test Series 2' m2.series_index = 1 m2.author_sort = 'as1' m2.isbn = 'isbn1' self.db.add_books(paths, formats, [m1, m2, m2], add_duplicates=True) self.m1, self.m2 = m1, m2
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(None, [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) # Only Dropbook produced 132 byte record0 files are supported if len(pheader.section_data(0)) == 132: hr = HeaderRecord(pheader.section_data(0)) if hr.compression in (2, 10) and hr.has_metadata == 1: try: mdata = pheader.section_data(hr.metadata_offset) mdata = mdata.split('\x00') mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0]) mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])] mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3]) mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4]) except: pass if extract_cover: mi.cover_data = get_cover(pheader, hr) if not mi.title: mi.title = pheader.title if pheader.title else _('Unknown') return mi
def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if 'title' in data: mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif 'creator' in data: mi.authors = string_to_authors(data['creator']) if 'description' in data: mi.comments = data['description'] if 'language' in data: mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata','') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) snbFile = SNBFile() try: if not hasattr(stream, 'write'): snbFile.Parse(StringIO(stream), True) else: stream.seek(0) snbFile.Parse(stream, True) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = etree.fromstring(meta) mi.title = meta.find('.//head/name').text mi.authors = [meta.find('.//head/author').text] mi.language = meta.find('.//head/language').text.lower().replace('_', '-') mi.publisher = meta.find('.//head/publisher').text if extract_cover: cover = meta.find('.//head/cover') if cover is not None and cover.text is not None: root, ext = os.path.splitext(cover.text) if ext == '.jpeg': ext = '.jpg' mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text)) except Exception: import traceback traceback.print_exc() return mi
def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if data.has_key('title'): mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif data.has_key('creator'): mi.authors = string_to_authors(data['creator']) if data.has_key('description'): mi.comments = data['description'] if data.has_key('language'): mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata','') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def get_metadata_from_reader(rdr): raw = rdr.GetFile(rdr.home) home = BeautifulSoup( xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0]) title = rdr.title try: x = rdr.GetEncoding() codecs.lookup(x) enc = x except: enc = 'cp1252' title = force_unicode(title, enc) authors = _get_authors(home) mi = MetaInformation(title, authors) publisher = _get_publisher(home) if publisher: mi.publisher = publisher isbn = _get_isbn(home) if isbn: mi.isbn = isbn comments = _get_comments(home) if comments: mi.comments = comments cdata = _get_cover(home, rdr) if cdata is not None: mi.cover_data = ('jpg', cdata) return mi
def get_metadata_from_reader(rdr): raw = rdr.GetFile(rdr.home) home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0]) title = rdr.title try: x = rdr.GetEncoding() codecs.lookup(x) enc = x except: enc = 'cp1252' title = force_unicode(title, enc) authors = _get_authors(home) mi = MetaInformation(title, authors) publisher = _get_publisher(home) if publisher: mi.publisher = publisher isbn = _get_isbn(home) if isbn: mi.isbn = isbn comments = _get_comments(home) if comments: mi.comments = comments cdata = _get_cover(home, rdr) if cdata is not None: mi.cover_data = ('jpg', cdata) return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(None, [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) # Only Dropbook produced 132 byte record0 files are supported if len(pheader.section_data(0)) == 132: hr = HeaderRecord(pheader.section_data(0)) if hr.compression in (2, 10) and hr.has_metadata == 1: try: mdata = pheader.section_data(hr.metadata_offset) mdata = mdata.decode('cp1252', 'replace').split('\x00') mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0]) mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])] mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3]) mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4]) except Exception: pass if extract_cover: mi.cover_data = get_cover(pheader, hr) if not mi.title: mi.title = pheader.title if pheader.title else _('Unknown') return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) snbFile = SNBFile() try: if not hasattr(stream, 'write'): snbFile.Parse(io.BytesIO(stream), True) else: stream.seek(0) snbFile.Parse(stream, True) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = etree.fromstring(meta) mi.title = meta.find('.//head/name').text mi.authors = [meta.find('.//head/author').text] mi.language = meta.find('.//head/language').text.lower().replace('_', '-') mi.publisher = meta.find('.//head/publisher').text if extract_cover: cover = meta.find('.//head/cover') if cover is not None and cover.text is not None: root, ext = os.path.splitext(cover.text) if ext == '.jpeg': ext = '.jpg' mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text)) except Exception: import traceback traceback.print_exc() return mi
def get_metadata(stream): """ Return basic meta-data about the LRF file in C{stream} as a L{MetaInformation} object. @param stream: A file like object or an instance of L{LRFMetaFile} """ lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream) authors = string_to_authors(lrf.author) mi = MetaInformation(lrf.title.strip(), authors) mi.author = lrf.author.strip() mi.comments = lrf.free_text.strip() mi.category = lrf.category.strip()+', '+lrf.classification.strip() tags = [x.strip() for x in mi.category.split(',') if x.strip()] if tags: mi.tags = tags if mi.category.strip() == ',': mi.category = None mi.publisher = lrf.publisher.strip() mi.cover_data = lrf.get_cover() try: mi.title_sort = lrf.title_reading.strip() if not mi.title_sort: mi.title_sort = None except: pass try: mi.author_sort = lrf.author_reading.strip() if not mi.author_sort: mi.author_sort = None except: pass if not mi.title or 'unknown' in mi.title.lower(): mi.title = None if not mi.authors: mi.authors = None if not mi.author or 'unknown' in mi.author.lower(): mi.author = None if not mi.category or 'unknown' in mi.category.lower(): mi.category = None if not mi.publisher or 'unknown' in mi.publisher.lower() or \ 'some publisher' in mi.publisher.lower(): mi.publisher = None return mi
def get_metadata(stream): """ Return basic meta-data about the LRF file in C{stream} as a L{MetaInformation} object. @param stream: A file like object or an instance of L{LRFMetaFile} """ lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream) authors = string_to_authors(lrf.author) mi = MetaInformation(lrf.title.strip(), authors) mi.author = lrf.author.strip() mi.comments = lrf.free_text.strip() mi.category = lrf.category.strip() + ', ' + lrf.classification.strip() tags = [x.strip() for x in mi.category.split(',') if x.strip()] if tags: mi.tags = tags if mi.category.strip() == ',': mi.category = None mi.publisher = lrf.publisher.strip() mi.cover_data = lrf.get_cover() try: mi.title_sort = lrf.title_reading.strip() if not mi.title_sort: mi.title_sort = None except: pass try: mi.author_sort = lrf.author_reading.strip() if not mi.author_sort: mi.author_sort = None except: pass if not mi.title or 'unknown' in mi.title.lower(): mi.title = None if not mi.authors: mi.authors = None if not mi.author or 'unknown' in mi.author.lower(): mi.author = None if not mi.category or 'unknown' in mi.category.lower(): mi.category = None if not mi.publisher or 'unknown' in mi.publisher.lower() or \ 'some publisher' in mi.publisher.lower(): mi.publisher = None return mi
def get_metadata(f): read = lambda at, amount: _read(f, at, amount) f.seek(0) buf = f.read(12) if buf[4:] == b'ftypLRX2': offset = 0 while True: offset += word_be(buf[:4]) try: buf = read(offset, 8) except: raise ValueError('Not a valid LRX file') if buf[4:] == b'bbeb': break offset += 8 buf = read(offset, 16) if buf[:8].decode('utf-16-le') != 'LRF\x00': raise ValueError('Not a valid LRX file') lrf_version = word_le(buf[8:12]) offset += 0x4c compressed_size = short_le(read(offset, 2)) offset += 2 if lrf_version >= 800: offset += 6 compressed_size -= 4 uncompressed_size = word_le(read(offset, 4)) info = decompress(f.read(compressed_size)) if len(info) != uncompressed_size: raise ValueError('LRX file has malformed metadata section') root = safe_xml_fromstring(info) bi = root.find('BookInfo') title = bi.find('Title') title_sort = title.get('reading', None) title = title.text author = bi.find('Author') author_sort = author.get('reading', None) mi = MetaInformation(title, string_to_authors(author.text)) mi.title_sort, mi.author_sort = title_sort, author_sort author = author.text publisher = bi.find('Publisher') mi.publisher = getattr(publisher, 'text', None) mi.tags = [x.text for x in bi.findall('Category')] mi.language = root.find('DocInfo').find('Language').text return mi elif buf[4:8] == b'LRX': raise ValueError('Librie LRX format not supported') else: raise ValueError('Not a LRX file')
def get_metadata(f): read = lambda at, amount: _read(f, at, amount) f.seek(0) buf = f.read(12) if buf[4:] == 'ftypLRX2': offset = 0 while True: offset += word_be(buf[:4]) try: buf = read(offset, 8) except: raise ValueError('Not a valid LRX file') if buf[4:] == 'bbeb': break offset += 8 buf = read(offset, 16) if buf[:8].decode('utf-16-le') != 'LRF\x00': raise ValueError('Not a valid LRX file') lrf_version = word_le(buf[8:12]) offset += 0x4c compressed_size = short_le(read(offset, 2)) offset += 2 if lrf_version >= 800: offset += 6 compressed_size -= 4 uncompressed_size = word_le(read(offset, 4)) info = decompress(f.read(compressed_size)) if len(info) != uncompressed_size: raise ValueError('LRX file has malformed metadata section') root = etree.fromstring(info) bi = root.find('BookInfo') title = bi.find('Title') title_sort = title.get('reading', None) title = title.text author = bi.find('Author') author_sort = author.get('reading', None) mi = MetaInformation(title, string_to_authors(author.text)) mi.title_sort, mi.author_sort = title_sort, author_sort author = author.text publisher = bi.find('Publisher') mi.publisher = getattr(publisher, 'text', None) mi.tags = [x.text for x in bi.findall('Category')] mi.language = root.find('DocInfo').find('Language').text return mi elif buf[4:8] == 'LRX': raise ValueError('Librie LRX format not supported') else: raise ValueError('Not a LRX file')
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ stream.seek(0) if stream.read(5) != r'{\rtf': return MetaInformation(_('Unknown')) block = get_document_info(stream)[0] if not block: return MetaInformation(_('Unknown')) stream.seek(0) cpg = detect_codepage(stream) stream.seek(0) title_match = title_pat.search(block) if title_match is not None: title = decode(title_match.group(1).strip(), cpg) else: title = _('Unknown') author_match = author_pat.search(block) if author_match is not None: author = decode(author_match.group(1).strip(), cpg) else: author = None mi = MetaInformation(title) if author: mi.authors = string_to_authors(author) comment_match = comment_pat.search(block) if comment_match is not None: comment = decode(comment_match.group(1).strip(), cpg) mi.comments = comment tags_match = tags_pat.search(block) if tags_match is not None: tags = decode(tags_match.group(1).strip(), cpg) mi.tags = list(filter(None, (x.strip() for x in tags.split(',')))) publisher_match = publisher_pat.search(block) if publisher_match is not None: publisher = decode(publisher_match.group(1).strip(), cpg) mi.publisher = publisher return mi
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ stream.seek(0) if stream.read(5) != br'{\rtf': return MetaInformation(_('Unknown')) block = get_document_info(stream)[0] if not block: return MetaInformation(_('Unknown')) stream.seek(0) cpg = detect_codepage(stream) stream.seek(0) title_match = title_pat.search(block) if title_match is not None: title = decode(title_match.group(1).strip(), cpg) else: title = _('Unknown') author_match = author_pat.search(block) if author_match is not None: author = decode(author_match.group(1).strip(), cpg) else: author = None mi = MetaInformation(title) if author: mi.authors = [x.strip() for x in author.split(',')] comment_match = comment_pat.search(block) if comment_match is not None: comment = decode(comment_match.group(1).strip(), cpg) mi.comments = comment tags_match = tags_pat.search(block) if tags_match is not None: tags = decode(tags_match.group(1).strip(), cpg) mi.tags = list(filter(None, (x.strip() for x in tags.split(',')))) publisher_match = publisher_pat.search(block) if publisher_match is not None: publisher = decode(publisher_match.group(1).strip(), cpg) mi.publisher = publisher return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pml = '' if stream.name.endswith('.pmlz'): with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) pmls = glob.glob(os.path.join(tdir, '*.pml')) for p in pmls: with open(p, 'r+b') as p_stream: pml += p_stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], tdir, True) else: pml = stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name))) for comment in re.findall(r'(?mus)\\v.*?\\v', pml): m = re.search(r'TITLE="(.*?)"', comment) if m: mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(r'AUTHOR="(.*?)"', comment) if m: if mi.authors == [_('Unknown')]: mi.authors = [] mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))) m = re.search(r'PUBLISHER="(.*?)"', comment) if m: mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(r'COPYRIGHT="(.*?)"', comment) if m: mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(r'ISBN="(.*?)"', comment) if m: mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pml = b'' if stream.name.endswith('.pmlz'): with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) pmls = glob.glob(os.path.join(tdir, '*.pml')) for p in pmls: with open(p, 'r+b') as p_stream: pml += p_stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], tdir, True) else: pml = stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name))) for comment in re.findall(br'(?ms)\\v.*?\\v', pml): m = re.search(br'TITLE="(.*?)"', comment) if m: mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(br'AUTHOR="(.*?)"', comment) if m: if mi.authors == [_('Unknown')]: mi.authors = [] mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))) m = re.search(br'PUBLISHER="(.*?)"', comment) if m: mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(br'COPYRIGHT="(.*?)"', comment) if m: mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(br'ISBN="(.*?)"', comment) if m: mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) return mi
def get_metadata(self): title, authors = self.get_title_and_authors() mi = MetaInformation(title, authors) publisher = str(self.publisher.text()).strip() if publisher: mi.publisher = publisher author_sort = str(self.author_sort.text()).strip() if author_sort: mi.author_sort = author_sort comments = self.comment.html if comments: mi.comments = comments mi.series_index = float(self.series_index.value()) series = str(self.series.currentText()).strip() if series: mi.series = series tags = [t.strip() for t in str(self.tags.text()).strip().split(',')] if tags: mi.tags = tags return mi
def get_metadata(self): title, authors = self.get_title_and_authors() mi = MetaInformation(title, authors) publisher = unicode(self.publisher.text()).strip() if publisher: mi.publisher = publisher author_sort = unicode(self.author_sort.text()).strip() if author_sort: mi.author_sort = author_sort comments = self.comment.html if comments: mi.comments = comments mi.series_index = float(self.series_index.value()) series = unicode(self.series.currentText()).strip() if series: mi.series = series tags = [t.strip() for t in unicode(self.tags.text()).strip().split(',')] if tags: mi.tags = tags return mi
def populate(self, entries, browser, verbose=False, api_key=''): for x in entries: try: id_url = entry_id(x)[0].text title = self.get_title(x) except: report(verbose) mi = MetaInformation(title, self.get_authors(x)) try: if api_key != '': id_url = id_url + "?apikey=" + api_key raw = browser.open(id_url).read() feed = etree.fromstring(raw) x = entry(feed)[0] except Exception, e: if verbose: print 'Failed to get all details for an entry' print e mi.comments = self.get_description(x, verbose) mi.tags = self.get_tags(x, verbose) mi.isbn = self.get_isbn(x, verbose) mi.publisher = self.get_publisher(x, verbose) mi.pubdate = self.get_date(x, verbose) self.append(mi)
def get_metadata(stream, extract_cover=True): whitespace = re.compile(r'\s+') def normalize(s): return whitespace.sub(' ', s).strip() with ZipFile(stream) as zf: meta = zf.read('meta.xml') root = fromstring(meta) def find(field): ns, tag = fields[field] ans = root.xpath('//ns0:{}'.format(tag), namespaces={'ns0': ns}) if ans: return normalize( tostring(ans[0], method='text', encoding='unicode', with_tail=False)).strip() mi = MetaInformation(None, []) title = find('title') if title: mi.title = title creator = find('initial-creator') or find('creator') if creator: mi.authors = string_to_authors(creator) desc = find('description') if desc: mi.comments = desc lang = find('language') if lang and canonicalize_lang(lang): mi.languages = [canonicalize_lang(lang)] kw = find('keyword') or find('keywords') if kw: mi.tags = [x.strip() for x in kw.split(',') if x.strip()] data = {} for tag in root.xpath('//ns0:user-defined', namespaces={'ns0': fields['user-defined'][0]}): name = (tag.get('{%s}name' % METANS) or '').lower() vtype = tag.get('{%s}value-type' % METANS) or 'string' val = tag.text if name and val: if vtype == 'boolean': val = val == 'true' data[name] = val opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata'): # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.identifiers'): try: mi.identifiers = json.loads(data['opf.identifiers']) except Exception: pass if data.get('opf.rating'): try: mi.rating = max(0, min(float(data['opf.rating']), 10)) except Exception: pass if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except Exception: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', False) if not opfnocover: try: read_cover(stream, zf, mi, opfmeta, extract_cover) except Exception: pass # Do not let an error reading the cover prevent reading other data return mi
def get_metadata_(src, encoding=None): if not isinstance(src, unicode): if not encoding: src = xml_to_unicode(src)[0] else: src = src.decode(encoding, "replace") # Meta data definitions as in # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9 # Title title = None pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) src = src[:150000] # Searching shouldn't take too long match = pat.search(src) if match: title = match.group(2) else: for x in ("DC.title", "DCTERMS.title", "Title"): pat = get_meta_regexp_(x) match = pat.search(src) if match: title = match.group(1) break if not title: pat = re.compile("<title>([^<>]+?)</title>", re.IGNORECASE) match = pat.search(src) if match: title = match.group(1) # Author author = None pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: author = match.group(2).replace(",", ";") else: for x in ("Author", "DC.creator.aut", "DCTERMS.creator.aut", "DC.creator"): pat = get_meta_regexp_(x) match = pat.search(src) if match: author = match.group(1) break # Create MetaInformation with Title and Author ent_pat = re.compile(r"&(\S+)?;") if title: title = ent_pat.sub(entity_to_unicode, title) if author: author = ent_pat.sub(entity_to_unicode, author) mi = MetaInformation(title, [author] if author else None) # Publisher publisher = None pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: publisher = match.group(2) else: for x in ("Publisher", "DC.publisher", "DCTERMS.publisher"): pat = get_meta_regexp_(x) match = pat.search(src) if match: publisher = match.group(1) break if publisher: mi.publisher = ent_pat.sub(entity_to_unicode, publisher) # ISBN isbn = None pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: isbn = match.group(1) else: for x in ("ISBN", "DC.identifier.ISBN", "DCTERMS.identifier.ISBN"): pat = get_meta_regexp_(x) match = pat.search(src) if match: isbn = match.group(1) break if isbn: mi.isbn = re.sub(r"[^0-9xX]", "", isbn) # LANGUAGE language = None pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: language = match.group(1) else: for x in ("DC.language", "DCTERMS.language"): pat = get_meta_regexp_(x) match = pat.search(src) if match: language = match.group(1) break if language: mi.language = language # PUBDATE pubdate = None pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: pubdate = match.group(1) else: for x in ( "Pubdate", "Date of publication", "DC.date.published", "DC.date.publication", "DC.date.issued", "DCTERMS.issued", ): pat = get_meta_regexp_(x) match = pat.search(src) if match: pubdate = match.group(1) break if pubdate: try: mi.pubdate = parse_date(pubdate) except: pass # TIMESTAMP timestamp = None pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: timestamp = match.group(1) else: for x in ("Timestamp", "Date of creation", "DC.date.created", "DC.date.creation", "DCTERMS.created"): pat = get_meta_regexp_(x) match = pat.search(src) if match: timestamp = match.group(1) break if timestamp: try: mi.timestamp = parse_date(timestamp) except: pass # SERIES series = None pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: series = match.group(1) else: pat = get_meta_regexp_("Series") match = pat.search(src) if match: series = match.group(1) if series: pat = re.compile(r"\[([.0-9]+)\]") match = pat.search(series) series_index = None if match is not None: try: series_index = float(match.group(1)) except: pass series = series.replace(match.group(), "").strip() mi.series = ent_pat.sub(entity_to_unicode, series) if series_index is None: pat = get_meta_regexp_("Seriesnumber") match = pat.search(src) if match: try: series_index = float(match.group(1)) except: pass if series_index is not None: mi.series_index = series_index # RATING rating = None pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: rating = match.group(1) else: pat = get_meta_regexp_("Rating") match = pat.search(src) if match: rating = match.group(1) if rating: try: mi.rating = float(rating) if mi.rating < 0: mi.rating = 0 if mi.rating > 5: mi.rating /= 2.0 if mi.rating > 5: mi.rating = 0 except: pass # COMMENTS comments = None pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: comments = match.group(1) else: pat = get_meta_regexp_("Comments") match = pat.search(src) if match: comments = match.group(1) if comments: mi.comments = ent_pat.sub(entity_to_unicode, comments) # TAGS tags = None pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: tags = match.group(1) else: pat = get_meta_regexp_("Tags") match = pat.search(src) if match: tags = match.group(1) if tags: mi.tags = [x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",")] # Ready to return MetaInformation return mi
def _do_split(self, db, source_id, misource, splitepub, origlines, newspecs, deftitle=None): linenums, changedtocs, checkedalways = newspecs # logger.debug("updated tocs:%s"%changedtocs) if not self.has_lines(linenums): return #logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() #logger.debug("linenums:%s"%linenums) defauthors = None if not deftitle and prefs['copytoctitle']: if linenums[0] in changedtocs: deftitle = changedtocs[linenums[0]][0] # already unicoded()'ed elif len(origlines[linenums[0]]['toc']) > 0: deftitle = unicode(origlines[linenums[0]]['toc'][0]) #logger.debug("deftitle:%s"%deftitle) if not deftitle and prefs['copytitle']: deftitle = _("%s Split") % misource.title if prefs['copyauthors']: defauthors = misource.authors mi = MetaInformation(deftitle, defauthors) if prefs['copytags']: mi.tags = misource.tags # [item for sublist in tagslists for item in sublist] if prefs['copylanguages']: mi.languages = misource.languages if prefs['copyseries']: mi.series = misource.series if prefs['copydate']: mi.timestamp = misource.timestamp if prefs['copyrating']: mi.rating = misource.rating if prefs['copypubdate']: mi.pubdate = misource.pubdate if prefs['copypublisher']: mi.publisher = misource.publisher if prefs['copyidentifiers']: mi.set_identifiers(misource.get_identifiers()) if prefs['copycomments'] and misource.comments: mi.comments = "<p>" + _("Split from:") + "</p>" + misource.comments #logger.debug("mi:%s"%mi) book_id = db.create_book_entry(mi, add_duplicates=True) if prefs['copycover'] and misource.has_cover: db.set_cover(book_id, db.cover(source_id, index_is_id=True)) #logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() custom_columns = self.gui.library_view.model().custom_columns for col, action in six.iteritems(prefs['custom_cols']): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: #logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) label = coldef['label'] value = db.get_custom(source_id, label=label, index_is_id=True) if value: db.set_custom(book_id, value, label=label, commit=False) #logger.debug("3.5:%s"%(time.time()-self.t)) self.t = time.time() if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \ and prefs['sourcetemplate']: val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource) #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val)) label = custom_columns[prefs['sourcecol']]['label'] if custom_columns[prefs['sourcecol']]['datatype'] == 'series': val = val + (" [%s]" % self.book_count) db.set_custom(book_id, val, label=label, commit=False) self.book_count = self.book_count + 1 db.commit() #logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() editconfig_txt = _( 'You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.' ) if prefs['editmetadata']: confirm( '\n' + _('''The book for the new Split EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows. You can fill in the metadata yourself, or use download metadata for known books. If you download or add a cover image, it will be included in the generated EPUB.''' ) + '\n\n' + editconfig_txt + '\n', 'epubsplit_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) try: QApplication.setOverrideCursor(QCursor(Qt.WaitCursor)) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() self.gui.status_bar.show_message(_('Splitting off from EPUB...'), 60000) mi = db.get_metadata(book_id, index_is_id=True) outputepub = PersistentTemporaryFile(suffix='.epub') coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') outlist = list(set(linenums + checkedalways)) outlist.sort() splitepub.write_split_epub(outputepub, outlist, changedtocs=changedtocs, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, coverjpgpath=coverjpgpath) #logger.debug("6:%s"%(time.time()-self.t)) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', outputepub, index_is_id=True) #logger.debug("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed( current, self.previous) if self.gui.cover_flow: self.gui.cover_flow.dataChanged() finally: QApplication.restoreOverrideCursor() if not prefs['editmetadata']: confirm( '<p>' + '</p><p>'.join([ _('<b><u>%s</u> by %s</b> has been created and default metadata filled in.' ) % (mi.title, ', '.join(mi.authors)), _('EpubSplit now skips the Edit Metadata step by default.' ), editconfig_txt ]) + '</p>', 'epubsplit_created_now_no_edit_again', self.gui)
def _start_merge(self, book_list): db = self.gui.current_db self.previous = self.gui.library_view.currentIndex() # if any bad, bail. bad_list = filter(lambda x: not x['good'], book_list) if len(bad_list) > 0: d = error_dialog(self.gui, _('Cannot Merge Epubs'), _('%s books failed.') % len(bad_list), det_msg='\n'.join( map(lambda x: x['error'], bad_list))) d.exec_() else: d = OrderEPUBsDialog( self.gui, _('Order EPUBs to Merge'), prefs, self.qaction.icon(), book_list, ) d.exec_() if d.result() != d.Accepted: return book_list = d.get_books() logger.debug("2:%s" % (time.time() - self.t)) self.t = time.time() deftitle = "%s %s" % (book_list[0]['title'], prefs['mergeword']) mi = MetaInformation(deftitle, ["Temp Author"]) # if all same series, use series for name. But only if all. serieslist = map(lambda x: x['series'], filter(lambda x: x['series'] != None, book_list)) if len(serieslist) == len(book_list): mi.title = serieslist[0] for sr in serieslist: if mi.title != sr: mi.title = deftitle break # logger.debug("======================= mi.title:\n%s\n========================="%mi.title) mi.authors = list() authorslists = map(lambda x: x['authors'], book_list) for l in authorslists: for a in l: if a not in mi.authors: mi.authors.append(a) #mi.authors = [item for sublist in authorslists for item in sublist] # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors) #mi.author_sort = ' & '.join(map(lambda x : x['author_sort'], book_list)) # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort) # set publisher if all from same publisher. publishers = set(map(lambda x: x['publisher'], book_list)) if len(publishers) == 1: mi.publisher = publishers.pop() # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher) tagslists = map(lambda x: x['tags'], book_list) mi.tags = [item for sublist in tagslists for item in sublist] mi.tags.extend(prefs['mergetags'].split(',')) # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags']) # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags) languageslists = map(lambda x: x['languages'], book_list) mi.languages = [ item for sublist in languageslists for item in sublist ] mi.series = '' # ======================= make book comments ========================= if len(mi.authors) > 1: booktitle = lambda x: _("%s by %s") % (x['title'], ' & '.join( x['authors'])) else: booktitle = lambda x: x['title'] mi.comments = (_("%s containing:") + "\n\n") % prefs['mergeword'] if prefs['includecomments']: def bookcomments(x): if x['comments']: return '<b>%s</b>\n\n%s' % (booktitle(x), x['comments']) else: return '<b>%s</b>\n' % booktitle(x) mi.comments += ('<div class="mergedbook">' + '<hr></div><div class="mergedbook">'.join( [bookcomments(x) for x in book_list]) + '</div>') else: mi.comments += '\n'.join([booktitle(x) for x in book_list]) # ======================= make book entry ========================= book_id = db.create_book_entry(mi, add_duplicates=True) # set default cover to same as first book coverdata = db.cover(book_list[0]['calibre_id'], index_is_id=True) if coverdata: db.set_cover(book_id, coverdata) # ======================= custom columns =================== logger.debug("3:%s" % (time.time() - self.t)) self.t = time.time() # have to get custom from db for each book. idslist = map(lambda x: x['calibre_id'], book_list) custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: logger.debug("%s not an existing column, skipping." % col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) if action not in permitted_values[coldef['datatype']]: logger.debug( "%s not a valid column type for %s, skipping." % (col, action)) continue label = coldef['label'] found = False value = None idx = None if action == 'first': idx = 0 if action == 'last': idx = -1 if action in ['first', 'last']: value = db.get_custom(idslist[idx], label=label, index_is_id=True) if coldef['datatype'] == 'series' and value != None: # get the number-in-series, too. value = "%s [%s]" % ( value, db.get_custom_extra( idslist[idx], label=label, index_is_id=True)) found = True if action in ('add', 'average', 'averageall'): value = 0.0 count = 0 for bid in idslist: try: value += db.get_custom(bid, label=label, index_is_id=True) found = True # only count ones with values unless averageall count += 1 except: # if not set, it's None and fails. # only count ones with values unless averageall if action == 'averageall': count += 1 if found and action in ('average', 'averageall'): value = value / count if coldef['datatype'] == 'int': value += 0.5 # so int rounds instead of truncs. if action == 'and': value = True for bid in idslist: try: value = value and db.get_custom( bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'or': value = False for bid in idslist: try: value = value or db.get_custom( bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'newest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue > value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'oldest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue < value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'union': if not coldef['is_multiple']: action = 'concat' else: value = set() for bid in idslist: try: value = value.union( db.get_custom(bid, label=label, index_is_id=True)) found = True except: # if not set, it's None and fails. pass if action == 'concat': value = "" for bid in idslist: try: value = value + ' ' + db.get_custom( bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass value = value.strip() if found and value != None: db.set_custom(book_id, value, label=label, commit=False) db.commit() logger.debug("4:%s" % (time.time() - self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) logger.debug("5:%s" % (time.time() - self.t)) self.t = time.time() confirm( '\n' + _('''The book for the new Merged EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.''' ), 'epubmerge_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) logger.debug("5:%s" % (time.time() - self.t)) self.t = time.time() self.gui.tags_view.recount() totalsize = sum(map(lambda x: x['epub_size'], book_list)) logger.debug("merging %s EPUBs totaling %s" % (len(book_list), gethumanreadable(totalsize))) if len(book_list) > 100 or totalsize > 5 * 1024 * 1024: confirm( '\n' + _('''You're merging %s EPUBs totaling %s. Calibre will be locked until the merge is finished.''' ) % (len(book_list), gethumanreadable(totalsize)), 'epubmerge_edited_now_merge_again', self.gui) self.gui.status_bar.show_message( _('Merging %s EPUBs...') % len(book_list), 60000) mi = db.get_metadata(book_id, index_is_id=True) mergedepub = PersistentTemporaryFile(suffix='.epub') epubstomerge = map(lambda x: x['epub'], book_list) coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') self.do_merge(mergedepub, epubstomerge, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, titlenavpoints=prefs['titlenavpoints'], flattentoc=prefs['flattentoc'], printtimes=True, coverjpgpath=coverjpgpath, keepmetadatafiles=prefs['keepmeta']) logger.debug("6:%s" % (time.time() - self.t)) logger.debug(_("Merge finished, output in:\n%s") % mergedepub.name) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', mergedepub, index_is_id=True) logger.debug("7:%s" % (time.time() - self.t)) self.t = time.time() self.gui.status_bar.show_message( _('Finished merging %s EPUBs.') % len(book_list), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed( current, self.previous)
def _do_split(self, db, source_id, misource, splitepub, newspecs, deftitle=None, editmeta=True): linenums, changedtocs = newspecs # logger.debug("updated tocs:%s"%changedtocs) # logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() # logger.debug("linenums:%s"%linenums) defauthors = None if not deftitle and prefs['copytitle']: deftitle = _("نمونه %s") % misource.title if prefs['copyauthors']: defauthors = misource.authors mi = MetaInformation(deftitle, defauthors) if prefs['copytags']: mi.tags = misource.tags # [item for sublist in tagslists for item in sublist] if prefs['copylanguages']: mi.languages = misource.languages if prefs['copyseries']: mi.series = misource.series if prefs['copydate']: mi.timestamp = misource.timestamp if prefs['copyrating']: mi.rating = misource.rating if prefs['copypubdate']: mi.pubdate = misource.pubdate if prefs['copypublisher']: mi.publisher = misource.publisher if prefs['copyidentifiers']: mi.set_identifiers(misource.get_identifiers()) if prefs['copycomments'] and misource.comments: mi.comments = _("Split from:") + "\n\n" + misource.comments # logger.debug("mi:%s"%mi) book_id = db.create_book_entry(mi, add_duplicates=True) if prefs['copycover'] and misource.has_cover: db.set_cover(book_id, db.cover(source_id, index_is_id=True)) # logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): # logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: # logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] # logger.debug("coldef:%s"%coldef) label = coldef['label'] value = db.get_custom(source_id, label=label, index_is_id=True) if value: db.set_custom(book_id, value, label=label, commit=False) # logger.debug("3.5:%s"%(time.time()-self.t)) self.t = time.time() if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \ and prefs['sourcetemplate']: val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource) # logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val)) label = custom_columns[prefs['sourcecol']]['label'] db.set_custom(book_id, val, label=label, commit=False) db.commit() # logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) # logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() # if editmeta: # confirm('\n'+_('کتاب نمونه ساخته شود؟')+'\n', # 'epubsplit_created_now_edit_again', # self.gui) # # self.gui.iactions['Edit Metadata'].edit_metadata(False) # logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() self.gui.status_bar.show_message(_('فایل نمونه ساخته شد'), 60000) mi = db.get_metadata(book_id, index_is_id=True) outputepub = PersistentTemporaryFile(suffix='.epub') coverjpgpath = None # if mi.has_cover: # # grab the path to the real image. # coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') splitepub.write_split_epub(outputepub, linenums, changedtocs=changedtocs, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, coverjpgpath=coverjpgpath) # logger.debug("6:%s"%(time.time()-self.t)) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', outputepub, index_is_id=True) # logger.debug("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed(current, self.previous)
def _do_split(self, db, source_id, misource, splitepub, origlines, newspecs, deftitle=None): linenums, changedtocs, checkedalways = newspecs # logger.debug("updated tocs:%s"%changedtocs) if not self.has_lines(linenums): return #logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() #logger.debug("linenums:%s"%linenums) defauthors = None if not deftitle and prefs['copytoctitle']: if linenums[0] in changedtocs: deftitle=changedtocs[linenums[0]][0] # already unicoded()'ed elif len(origlines[linenums[0]]['toc']) > 0: deftitle=unicode(origlines[linenums[0]]['toc'][0]) #logger.debug("deftitle:%s"%deftitle) if not deftitle and prefs['copytitle']: deftitle = _("%s Split") % misource.title if prefs['copyauthors']: defauthors = misource.authors mi = MetaInformation(deftitle,defauthors) if prefs['copytags']: mi.tags = misource.tags # [item for sublist in tagslists for item in sublist] if prefs['copylanguages']: mi.languages = misource.languages if prefs['copyseries']: mi.series = misource.series if prefs['copydate']: mi.timestamp = misource.timestamp if prefs['copyrating']: mi.rating = misource.rating if prefs['copypubdate']: mi.pubdate = misource.pubdate if prefs['copypublisher']: mi.publisher = misource.publisher if prefs['copyidentifiers']: mi.set_identifiers(misource.get_identifiers()) if prefs['copycomments'] and misource.comments: mi.comments = "<p>"+_("Split from:")+"</p>" + misource.comments #logger.debug("mi:%s"%mi) book_id = db.create_book_entry(mi, add_duplicates=True) if prefs['copycover'] and misource.has_cover: db.set_cover(book_id, db.cover(source_id,index_is_id=True)) #logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: #logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) label = coldef['label'] value = db.get_custom(source_id, label=label, index_is_id=True) if value: db.set_custom(book_id,value,label=label,commit=False) #logger.debug("3.5:%s"%(time.time()-self.t)) self.t = time.time() if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \ and prefs['sourcetemplate']: val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource) #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val)) label = custom_columns[prefs['sourcecol']]['label'] if custom_columns[prefs['sourcecol']]['datatype'] == 'series': val = val + (" [%s]"%self.book_count) db.set_custom(book_id, val, label=label, commit=False) self.book_count = self.book_count+1 db.commit() #logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() editconfig_txt = _('You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.') if prefs['editmetadata']: confirm('\n'+_('''The book for the new Split EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows. You can fill in the metadata yourself, or use download metadata for known books. If you download or add a cover image, it will be included in the generated EPUB.''')+'\n\n'+ editconfig_txt+'\n', 'epubsplit_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) try: QApplication.setOverrideCursor(QCursor(Qt.WaitCursor)) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() self.gui.status_bar.show_message(_('Splitting off from EPUB...'), 60000) mi = db.get_metadata(book_id,index_is_id=True) outputepub = PersistentTemporaryFile(suffix='.epub') coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') outlist = list(set(linenums + checkedalways)) outlist.sort() splitepub.write_split_epub(outputepub, outlist, changedtocs=changedtocs, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, coverjpgpath=coverjpgpath) #logger.debug("6:%s"%(time.time()-self.t)) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', outputepub, index_is_id=True) #logger.debug("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed(current, self.previous) finally: QApplication.restoreOverrideCursor() if not prefs['editmetadata']: confirm('<p>'+ '</p><p>'.join([_('<b><u>%s</u> by %s</b> has been created and default metadata filled in.')%(mi.title,', '.join(mi.authors)), _('EpubSplit now skips the Edit Metadata step by default.'), editconfig_txt])+ '</p>', 'epubsplit_created_now_no_edit_again', self.gui)
def get_metadata_(src, encoding=None): if not isinstance(src, unicode): if not encoding: src = xml_to_unicode(src)[0] else: src = src.decode(encoding, 'replace') # Meta data definitions as in # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9 # Title title = None pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) src = src[:150000] # Searching shouldn't take too long match = pat.search(src) if match: title = match.group(2) else: for x in ('DC.title', 'DCTERMS.title', 'Title'): pat = get_meta_regexp_(x) match = pat.search(src) if match: title = match.group(1) break if not title: pat = re.compile('<title>([^<>]+?)</title>', re.IGNORECASE) match = pat.search(src) if match: title = match.group(1) # Author author = None pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: author = match.group(2).replace(',', ';') else: for x in ('Author', 'DC.creator.aut', 'DCTERMS.creator.aut', 'DC.creator'): pat = get_meta_regexp_(x) match = pat.search(src) if match: author = match.group(1) break # Create MetaInformation with Title and Author ent_pat = re.compile(r'&(\S+)?;') if title: title = ent_pat.sub(entity_to_unicode, title) if author: author = ent_pat.sub(entity_to_unicode, author) mi = MetaInformation(title, [author] if author else None) # Publisher publisher = None pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: publisher = match.group(2) else: for x in ('Publisher', 'DC.publisher', 'DCTERMS.publisher'): pat = get_meta_regexp_(x) match = pat.search(src) if match: publisher = match.group(1) break if publisher: mi.publisher = ent_pat.sub(entity_to_unicode, publisher) # ISBN isbn = None pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: isbn = match.group(1) else: for x in ('ISBN', 'DC.identifier.ISBN', 'DCTERMS.identifier.ISBN'): pat = get_meta_regexp_(x) match = pat.search(src) if match: isbn = match.group(1) break if isbn: mi.isbn = re.sub(r'[^0-9xX]', '', isbn) # LANGUAGE language = None pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: language = match.group(1) else: for x in ('DC.language', 'DCTERMS.language'): pat = get_meta_regexp_(x) match = pat.search(src) if match: language = match.group(1) break if language: mi.language = language # PUBDATE pubdate = None pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: pubdate = match.group(1) else: for x in ('Pubdate', 'Date of publication', 'DC.date.published', 'DC.date.publication', 'DC.date.issued', 'DCTERMS.issued'): pat = get_meta_regexp_(x) match = pat.search(src) if match: pubdate = match.group(1) break if pubdate: try: mi.pubdate = parse_date(pubdate) except: pass # TIMESTAMP timestamp = None pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: timestamp = match.group(1) else: for x in ('Timestamp', 'Date of creation', 'DC.date.created', 'DC.date.creation', 'DCTERMS.created'): pat = get_meta_regexp_(x) match = pat.search(src) if match: timestamp = match.group(1) break if timestamp: try: mi.timestamp = parse_date(timestamp) except: pass # SERIES series = None pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: series = match.group(1) else: pat = get_meta_regexp_("Series") match = pat.search(src) if match: series = match.group(1) if series: pat = re.compile(r'\[([.0-9]+)\]') match = pat.search(series) series_index = None if match is not None: try: series_index = float(match.group(1)) except: pass series = series.replace(match.group(), '').strip() mi.series = ent_pat.sub(entity_to_unicode, series) if series_index is None: pat = get_meta_regexp_("Seriesnumber") match = pat.search(src) if match: try: series_index = float(match.group(1)) except: pass if series_index is not None: mi.series_index = series_index # RATING rating = None pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: rating = match.group(1) else: pat = get_meta_regexp_("Rating") match = pat.search(src) if match: rating = match.group(1) if rating: try: mi.rating = float(rating) if mi.rating < 0: mi.rating = 0 if mi.rating > 5: mi.rating /= 2. if mi.rating > 5: mi.rating = 0 except: pass # COMMENTS comments = None pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: comments = match.group(1) else: pat = get_meta_regexp_("Comments") match = pat.search(src) if match: comments = match.group(1) if comments: mi.comments = ent_pat.sub(entity_to_unicode, comments) # TAGS tags = None pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: tags = match.group(1) else: pat = get_meta_regexp_("Tags") match = pat.search(src) if match: tags = match.group(1) if tags: mi.tags = [ x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",") ] # Ready to return MetaInformation return mi
def metadata_from_filename(name, pat=None, fallback_pat=None): if isbytestring(name): name = name.decode(filesystem_encoding, 'replace') name = name.rpartition('.')[0] mi = MetaInformation(None, None) if pat is None: pat = re.compile(prefs.get('filename_pattern')) name = name.replace('_', ' ') match = pat.search(name) if match is None and fallback_pat is not None: match = fallback_pat.search(name) if match is not None: try: mi.title = match.group('title') except IndexError: pass try: au = match.group('author') aus = string_to_authors(au) if aus: mi.authors = aus if prefs['swap_author_names'] and mi.authors: def swap(a): if ',' in a: parts = a.split(',', 1) else: parts = a.split(None, 1) if len(parts) > 1: t = parts[-1] parts = parts[:-1] parts.insert(0, t) return ' '.join(parts) mi.authors = [swap(x) for x in mi.authors] except (IndexError, ValueError): pass try: mi.series = match.group('series') except IndexError: pass try: si = match.group('series_index') mi.series_index = float(si) except (IndexError, ValueError, TypeError): pass try: si = match.group('isbn') mi.isbn = si except (IndexError, ValueError): pass try: publisher = match.group('publisher') mi.publisher = publisher except (IndexError, ValueError): pass try: pubdate = match.group('published') if pubdate: from calibre.utils.date import parse_only_date mi.pubdate = parse_only_date(pubdate) except: pass try: comments = match.group('comments') mi.comments = comments except (IndexError, ValueError): pass if mi.is_null('title'): mi.title = name return mi
def _start_merge(self,book_list,tdir=None): db=self.gui.current_db self.previous = self.gui.library_view.currentIndex() # if any bad, bail. bad_list = [ x for x in book_list if not x['good'] ] if len(bad_list) > 0: d = error_dialog(self.gui, _('Cannot Merge Epubs'), _('%s books failed.')%len(bad_list), det_msg='\n'.join( [ x['error'] for x in bad_list ])) d.exec_() else: d = OrderEPUBsDialog(self.gui, _('Order EPUBs to Merge'), prefs, self.qaction.icon(), book_list, ) d.exec_() if d.result() != d.Accepted: return book_list = d.get_books() logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() deftitle = "%s %s" % (book_list[0]['title'],prefs['mergeword']) mi = MetaInformation(deftitle,["Temp Author"]) # if all same series, use series for name. But only if all. serieslist = [ x['series'] for x in book_list if x['series'] != None ] if len(serieslist) == len(book_list): mi.title = serieslist[0] for sr in serieslist: if mi.title != sr: mi.title = deftitle; break # logger.debug("======================= mi.title:\n%s\n========================="%mi.title) mi.authors = list() authorslists = [ x['authors'] for x in book_list ] for l in authorslists: for a in l: if a not in mi.authors: mi.authors.append(a) #mi.authors = [item for sublist in authorslists for item in sublist] # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors) #mi.author_sort = ' & '.join([ x['author_sort'] for x in book_list ]) # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort) # set publisher if all from same publisher. publishers = set([ x['publisher'] for x in book_list ]) if len(publishers) == 1: mi.publisher = publishers.pop() # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher) tagslists = [ x['tags'] for x in book_list ] mi.tags = [item for sublist in tagslists for item in sublist] mi.tags.extend(prefs['mergetags'].split(',')) # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags']) # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags) languageslists = [ x['languages'] for x in book_list ] mi.languages = [item for sublist in languageslists for item in sublist] mi.series = '' if prefs['firstseries'] and book_list[0]['series']: mi.series = book_list[0]['series'] mi.series_index = book_list[0]['series_index'] # ======================= make book comments ========================= if len(mi.authors) > 1: booktitle = lambda x : _("%s by %s") % (x['title'],' & '.join(x['authors'])) else: booktitle = lambda x : x['title'] mi.comments = ("<p>"+_("%s containing:")+"</p>") % prefs['mergeword'] if prefs['includecomments']: def bookcomments(x): if x['comments']: return '<p><b>%s</b></p>%s'%(booktitle(x),x['comments']) else: return '<b>%s</b><br/>'%booktitle(x) mi.comments += ('<div class="mergedbook">' + '<hr></div><div class="mergedbook">'.join([ bookcomments(x) for x in book_list]) + '</div>') else: mi.comments += '<br/>'.join( [ booktitle(x) for x in book_list ] ) # ======================= make book entry ========================= book_id = db.create_book_entry(mi, add_duplicates=True) # set default cover to same as first book coverdata = db.cover(book_list[0]['calibre_id'],index_is_id=True) if coverdata: db.set_cover(book_id, coverdata) # ======================= custom columns =================== logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() # have to get custom from db for each book. idslist = [ x['calibre_id'] for x in book_list ] custom_columns = self.gui.library_view.model().custom_columns for col, action in six.iteritems(prefs['custom_cols']): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) if action not in permitted_values[coldef['datatype']]: logger.debug("%s not a valid column type for %s, skipping."%(col,action)) continue label = coldef['label'] found = False value = None idx = None if action == 'first': idx = 0 if action == 'last': idx = -1 if action in ['first','last']: value = db.get_custom(idslist[idx], label=label, index_is_id=True) if coldef['datatype'] == 'series' and value != None: # get the number-in-series, too. value = "%s [%s]"%(value, db.get_custom_extra(idslist[idx], label=label, index_is_id=True)) found = True if action in ('add','average','averageall'): value = 0.0 count = 0 for bid in idslist: try: value += db.get_custom(bid, label=label, index_is_id=True) found = True # only count ones with values unless averageall count += 1 except: # if not set, it's None and fails. # only count ones with values unless averageall if action == 'averageall': count += 1 if found and action in ('average','averageall'): value = value / count if coldef['datatype'] == 'int': value += 0.5 # so int rounds instead of truncs. if action == 'and': value = True for bid in idslist: try: value = value and db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'or': value = False for bid in idslist: try: value = value or db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'newest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue > value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'oldest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue < value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'union': if not coldef['is_multiple']: action = 'concat' else: value = set() for bid in idslist: try: value = value.union(db.get_custom(bid, label=label, index_is_id=True)) found = True except: # if not set, it's None and fails. pass if action == 'concat': value = "" for bid in idslist: try: value = value + ' ' + db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass value = value.strip() if action == 'now': value = datetime.now() found = True logger.debug("now: %s"%value) if found and value != None: logger.debug("value: %s"%value) db.set_custom(book_id,value,label=label,commit=False) db.commit() logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() confirm('\n'+_('''The book for the new Merged EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''), 'epubmerge_created_now_edit_again', self.gui, title=_("EpubMerge"), show_cancel_button=False) self.gui.iactions['Edit Metadata'].edit_metadata(False) logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() totalsize = sum([ x['epub_size'] for x in book_list ]) logger.debug("merging %s EPUBs totaling %s"%(len(book_list),gethumanreadable(totalsize))) confirm('\n'+_('''EpubMerge will be done in a Background job. The merged EPUB will not appear in the Library until finished. You are merging %s EPUBs totaling %s.''')%(len(book_list),gethumanreadable(totalsize)), 'epubmerge_background_merge_again', self.gui, title=_("EpubMerge"), show_cancel_button=False) # if len(book_list) > 100 or totalsize > 5*1024*1024: # confirm('\n'+_('''You're merging %s EPUBs totaling %s. Calibre will be locked until the merge is finished.''')%(len(book_list),gethumanreadable(totalsize)), # 'epubmerge_edited_now_merge_again', # self.gui) self.gui.status_bar.show_message(_('Merging %s EPUBs...')%len(book_list), 60000) mi = db.get_metadata(book_id,index_is_id=True) mergedepub = PersistentTemporaryFile(prefix="output_", suffix='.epub', dir=tdir) epubstomerge = [ x['epub'] for x in book_list ] epubtitles = {} for x in book_list: # save titles indexed by epub for reporting from BG epubtitles[x['epub']]=_("%s by %s") % (x['title'],' & '.join(x['authors'])) coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') func = 'arbitrary_n' cpus = self.gui.job_manager.server.pool_size args = ['calibre_plugins.epubmerge.jobs', 'do_merge_bg', ({'book_id':book_id, 'book_count':len(book_list), 'tdir':tdir, 'outputepubfn':mergedepub.name, 'inputepubfns':epubstomerge, # already .name'ed 'epubtitles':epubtitles, # for reporting 'authoropts':mi.authors, 'titleopt':mi.title, 'descopt':mi.comments, 'tags':mi.tags, 'languages':mi.languages, 'titlenavpoints':prefs['titlenavpoints'], 'originalnavpoints':prefs['originalnavpoints'], 'flattentoc':prefs['flattentoc'], 'printtimes':True, 'coverjpgpath':coverjpgpath, 'keepmetadatafiles':prefs['keepmeta'] }, cpus)] desc = _('EpubMerge: %s')%mi.title job = self.gui.job_manager.run_job( self.Dispatcher(self.merge_done), func, args=args, description=desc) self.gui.jobs_pointer.start() self.gui.status_bar.show_message(_('Starting EpubMerge'),3000)
def _start_merge(self,book_list): db=self.gui.current_db self.previous = self.gui.library_view.currentIndex() # if any bad, bail. bad_list = filter(lambda x : not x['good'], book_list) if len(bad_list) > 0: d = error_dialog(self.gui, _('Cannot Merge Epubs'), _('%s books failed.')%len(bad_list), det_msg='\n'.join(map(lambda x : x['error'] , bad_list))) d.exec_() else: d = OrderEPUBsDialog(self.gui, _('Order EPUBs to Merge'), prefs, self.qaction.icon(), book_list, ) d.exec_() if d.result() != d.Accepted: return book_list = d.get_books() print("2:%s"%(time.time()-self.t)) self.t = time.time() deftitle = "%s %s" % (book_list[0]['title'],prefs['mergeword']) mi = MetaInformation(deftitle,["Temp Author"]) # if all same series, use series for name. But only if all. serieslist = map(lambda x : x['series'], filter(lambda x : x['series'] != None, book_list)) if len(serieslist) == len(book_list): mi.title = serieslist[0] for sr in serieslist: if mi.title != sr: mi.title = deftitle; break # print("======================= mi.title:\n%s\n========================="%mi.title) mi.authors = list() authorslists = map(lambda x : x['authors'], book_list) for l in authorslists: for a in l: if a not in mi.authors: mi.authors.append(a) #mi.authors = [item for sublist in authorslists for item in sublist] # print("======================= mi.authors:\n%s\n========================="%mi.authors) #mi.author_sort = ' & '.join(map(lambda x : x['author_sort'], book_list)) # print("======================= mi.author_sort:\n%s\n========================="%mi.author_sort) # set publisher if all from same publisher. publishers = set(map(lambda x : x['publisher'], book_list)) if len(publishers) == 1: mi.publisher = publishers.pop() # print("======================= mi.publisher:\n%s\n========================="%mi.publisher) tagslists = map(lambda x : x['tags'], book_list) mi.tags = [item for sublist in tagslists for item in sublist] mi.tags.extend(prefs['mergetags'].split(',')) # print("======================= mergetags:\n%s\n========================="%prefs['mergetags']) # print("======================= m.tags:\n%s\n========================="%mi.tags) languageslists = map(lambda x : x['languages'], book_list) mi.languages = [item for sublist in languageslists for item in sublist] mi.series = '' # ======================= make book comments ========================= if len(mi.authors) > 1: booktitle = lambda x : _("%s by %s") % (x['title'],' & '.join(x['authors'])) else: booktitle = lambda x : x['title'] mi.comments = (_("%s containing:")+"\n\n") % prefs['mergeword'] if prefs['includecomments']: def bookcomments(x): if x['comments']: return '<b>%s</b>\n\n%s'%(booktitle(x),x['comments']) else: return '<b>%s</b>\n'%booktitle(x) mi.comments += ('<div class="mergedbook">' + '<hr></div><div class="mergedbook">'.join([ bookcomments(x) for x in book_list]) + '</div>') else: mi.comments += '\n'.join( [ booktitle(x) for x in book_list ] ) # ======================= make book entry ========================= book_id = db.create_book_entry(mi, add_duplicates=True) # set default cover to same as first book coverdata = db.cover(book_list[0]['calibre_id'],index_is_id=True) if coverdata: db.set_cover(book_id, coverdata) # ======================= custom columns =================== print("3:%s"%(time.time()-self.t)) self.t = time.time() # have to get custom from db for each book. idslist = map(lambda x : x['calibre_id'], book_list) custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): #print("col: %s action: %s"%(col,action)) if col not in custom_columns: print("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #print("coldef:%s"%coldef) if action not in permitted_values[coldef['datatype']]: print("%s not a valid column type for %s, skipping."%(col,action)) continue label = coldef['label'] found = False value = None idx = None if action == 'first': idx = 0 if action == 'last': idx = -1 if action in ['first','last']: value = db.get_custom(idslist[idx], label=label, index_is_id=True) if coldef['datatype'] == 'series' and value != None: # get the number-in-series, too. value = "%s [%s]"%(value, db.get_custom_extra(idslist[idx], label=label, index_is_id=True)) found = True if action in ('add','average','averageall'): value = 0.0 count = 0 for bid in idslist: try: value += db.get_custom(bid, label=label, index_is_id=True) found = True # only count ones with values unless averageall count += 1 except: # if not set, it's None and fails. # only count ones with values unless averageall if action == 'averageall': count += 1 if found and action in ('average','averageall'): value = value / count if coldef['datatype'] == 'int': value += 0.5 # so int rounds instead of truncs. if action == 'and': value = True for bid in idslist: try: value = value and db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'or': value = False for bid in idslist: try: value = value or db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'newest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue > value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'oldest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue < value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'union': if not coldef['is_multiple']: action = 'concat' else: value = set() for bid in idslist: try: value = value.union(db.get_custom(bid, label=label, index_is_id=True)) found = True except: # if not set, it's None and fails. pass if action == 'concat': value = "" for bid in idslist: try: value = value + ' ' + db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass value = value.strip() if found and value != None: db.set_custom(book_id,value,label=label,commit=False) db.commit() print("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) print("5:%s"%(time.time()-self.t)) self.t = time.time() confirm('\n'+_('''The book for the new Merged EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''), 'epubmerge_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) print("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() totalsize = sum(map(lambda x : x['epub_size'], book_list)) print("merging %s EPUBs totaling %s"%(len(book_list),gethumanreadable(totalsize))) if len(book_list) > 100 or totalsize > 5*1024*1024: confirm('\n'+_('''You're merging %s EPUBs totaling %s. Calibre will be locked until the merge is finished.''')%(len(book_list),gethumanreadable(totalsize)), 'epubmerge_edited_now_merge_again', self.gui) self.gui.status_bar.show_message(_('Merging %s EPUBs...')%len(book_list), 60000) mi = db.get_metadata(book_id,index_is_id=True) mergedepub = PersistentTemporaryFile(suffix='.epub') epubstomerge = map(lambda x : x['epub'] , book_list) coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') self.do_merge( mergedepub, epubstomerge, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, titlenavpoints=prefs['titlenavpoints'], flattentoc=prefs['flattentoc'], printtimes=True, coverjpgpath=coverjpgpath, keepmetadatafiles=prefs['keepmeta'] ) print("6:%s"%(time.time()-self.t)) print(_("Merge finished, output in:\n%s")%mergedepub.name) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', mergedepub, index_is_id=True) print("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished merging %s EPUBs.')%len(book_list), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed(current, self.previous)
def metadata_from_filename(name, pat=None, fallback_pat=None): if isbytestring(name): name = name.decode(filesystem_encoding, "replace") name = name.rpartition(".")[0] mi = MetaInformation(None, None) if pat is None: pat = re.compile(prefs.get("filename_pattern")) name = name.replace("_", " ") match = pat.search(name) if match is None and fallback_pat is not None: match = fallback_pat.search(name) if match is not None: try: mi.title = match.group("title") except IndexError: pass try: au = match.group("author") aus = string_to_authors(au) if aus: mi.authors = aus if prefs["swap_author_names"] and mi.authors: def swap(a): if "," in a: parts = a.split(",", 1) else: parts = a.split(None, 1) if len(parts) > 1: t = parts[-1] parts = parts[:-1] parts.insert(0, t) return " ".join(parts) mi.authors = [swap(x) for x in mi.authors] except (IndexError, ValueError): pass try: mi.series = match.group("series") except IndexError: pass try: si = match.group("series_index") mi.series_index = float(si) except (IndexError, ValueError, TypeError): pass try: si = match.group("isbn") mi.isbn = si except (IndexError, ValueError): pass try: publisher = match.group("publisher") mi.publisher = publisher except (IndexError, ValueError): pass try: pubdate = match.group("published") if pubdate: from calibre.utils.date import parse_only_date mi.pubdate = parse_only_date(pubdate) except: pass if mi.is_null("title"): mi.title = name return mi