def setUp(self): self.tdir = PersistentTemporaryDirectory('_calibre_dbtest') self.db = LibraryDatabase2(self.tdir) f = open(os.path.join(self.tdir, 'test.txt'), 'w+b') f.write('test') paths = list(repeat(f, 3)) formats = list(repeat('txt', 3)) m1 = MetaInformation('Test Ebook 1', ['Test Author 1']) m1.tags = ['tag1', 'tag2'] m1.publisher = 'Test Publisher 1' m1.rating = 2 m1.series = 'Test Series 1' m1.series_index = 3 m1.author_sort = 'as1' m1.isbn = 'isbn1' m1.cover_data = ('jpg', self.img) m2 = MetaInformation('Test Ebook 2', ['Test Author 2']) m2.tags = ['tag3', 'tag4'] m2.publisher = 'Test Publisher 2' m2.rating = 3 m2.series = 'Test Series 2' m2.series_index = 1 m2.author_sort = 'as1' m2.isbn = 'isbn1' self.db.add_books(paths, formats, [m1, m2, m2], add_duplicates=True) self.m1, self.m2 = m1, m2
def do_set_metadata(opts, mi, stream, stream_type): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) from_opf = getattr(opts, 'from_opf', None) if from_opf is not None: from calibre.ebooks.metadata.opf2 import OPF opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata() mi.smart_update(opf_mi) for pref in config().option_set.preferences: if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort', 'author_sort', 'get_cover', 'cover', 'tags', 'lrf_bookid', 'identifiers'): continue val = getattr(opts, pref.name, None) if val is not None: setattr(mi, pref.name, val) if getattr(opts, 'authors', None) is not None: mi.authors = string_to_authors(opts.authors) mi.author_sort = authors_to_sort_string(mi.authors) if getattr(opts, 'author_sort', None) is not None: mi.author_sort = opts.author_sort if getattr(opts, 'title_sort', None) is not None: mi.title_sort = opts.title_sort elif getattr(opts, 'title', None) is not None: mi.title_sort = title_sort(opts.title) if getattr(opts, 'tags', None) is not None: mi.tags = [t.strip() for t in opts.tags.split(',')] if getattr(opts, 'series', None) is not None: mi.series = opts.series.strip() if getattr(opts, 'series_index', None) is not None: mi.series_index = float(opts.series_index.strip()) if getattr(opts, 'pubdate', None) is not None: mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False) if getattr(opts, 'identifiers', None): val = { k.strip(): v.strip() for k, v in (x.partition(':')[0::2] for x in opts.identifiers) } if val: orig = mi.get_identifiers() orig.update(val) val = {k: v for k, v in orig.iteritems() if k and v} mi.set_identifiers(val) if getattr(opts, 'cover', None) is not None: ext = os.path.splitext(opts.cover)[1].replace('.', '').upper() mi.cover_data = (ext, open(opts.cover, 'rb').read()) with force_identifiers: set_metadata(stream, mi, stream_type)
def do_set_metadata(opts, mi, stream, stream_type): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) from_opf = getattr(opts, 'from_opf', None) if from_opf is not None: from calibre.ebooks.metadata.opf2 import OPF opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata() mi.smart_update(opf_mi) for pref in config().option_set.preferences: if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort', 'author_sort', 'get_cover', 'cover', 'tags', 'lrf_bookid', 'identifiers'): continue val = getattr(opts, pref.name, None) if val is not None: setattr(mi, pref.name, val) if getattr(opts, 'authors', None) is not None: mi.authors = string_to_authors(opts.authors) mi.author_sort = authors_to_sort_string(mi.authors) if getattr(opts, 'author_sort', None) is not None: mi.author_sort = opts.author_sort if getattr(opts, 'title_sort', None) is not None: mi.title_sort = opts.title_sort elif getattr(opts, 'title', None) is not None: mi.title_sort = title_sort(opts.title) if getattr(opts, 'tags', None) is not None: mi.tags = [t.strip() for t in opts.tags.split(',')] if getattr(opts, 'series', None) is not None: mi.series = opts.series.strip() if getattr(opts, 'series_index', None) is not None: mi.series_index = float(opts.series_index.strip()) if getattr(opts, 'pubdate', None) is not None: mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False) if getattr(opts, 'identifiers', None): val = {k.strip():v.strip() for k, v in (x.partition(':')[0::2] for x in opts.identifiers)} if val: orig = mi.get_identifiers() orig.update(val) val = {k:v for k, v in iteritems(orig) if k and v} mi.set_identifiers(val) if getattr(opts, 'cover', None) is not None: ext = os.path.splitext(opts.cover)[1].replace('.', '').upper() mi.cover_data = (ext, open(opts.cover, 'rb').read()) with force_identifiers: set_metadata(stream, mi, stream_type)
def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if 'title' in data: mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif 'creator' in data: mi.authors = string_to_authors(data['creator']) if 'description' in data: mi.comments = data['description'] if 'language' in data: mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata','') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if data.has_key('title'): mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif data.has_key('creator'): mi.authors = string_to_authors(data['creator']) if data.has_key('description'): mi.comments = data['description'] if data.has_key('language'): mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata','') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def get_metadata(stream): """ Return basic meta-data about the LRF file in C{stream} as a L{MetaInformation} object. @param stream: A file like object or an instance of L{LRFMetaFile} """ lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream) authors = string_to_authors(lrf.author) mi = MetaInformation(lrf.title.strip(), authors) mi.author = lrf.author.strip() mi.comments = lrf.free_text.strip() mi.category = lrf.category.strip() + ', ' + lrf.classification.strip() tags = [x.strip() for x in mi.category.split(',') if x.strip()] if tags: mi.tags = tags if mi.category.strip() == ',': mi.category = None mi.publisher = lrf.publisher.strip() mi.cover_data = lrf.get_cover() try: mi.title_sort = lrf.title_reading.strip() if not mi.title_sort: mi.title_sort = None except: pass try: mi.author_sort = lrf.author_reading.strip() if not mi.author_sort: mi.author_sort = None except: pass if not mi.title or 'unknown' in mi.title.lower(): mi.title = None if not mi.authors: mi.authors = None if not mi.author or 'unknown' in mi.author.lower(): mi.author = None if not mi.category or 'unknown' in mi.category.lower(): mi.category = None if not mi.publisher or 'unknown' in mi.publisher.lower() or \ 'some publisher' in mi.publisher.lower(): mi.publisher = None return mi
def get_metadata(stream): """ Return basic meta-data about the LRF file in C{stream} as a L{MetaInformation} object. @param stream: A file like object or an instance of L{LRFMetaFile} """ lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream) authors = string_to_authors(lrf.author) mi = MetaInformation(lrf.title.strip(), authors) mi.author = lrf.author.strip() mi.comments = lrf.free_text.strip() mi.category = lrf.category.strip()+', '+lrf.classification.strip() tags = [x.strip() for x in mi.category.split(',') if x.strip()] if tags: mi.tags = tags if mi.category.strip() == ',': mi.category = None mi.publisher = lrf.publisher.strip() mi.cover_data = lrf.get_cover() try: mi.title_sort = lrf.title_reading.strip() if not mi.title_sort: mi.title_sort = None except: pass try: mi.author_sort = lrf.author_reading.strip() if not mi.author_sort: mi.author_sort = None except: pass if not mi.title or 'unknown' in mi.title.lower(): mi.title = None if not mi.authors: mi.authors = None if not mi.author or 'unknown' in mi.author.lower(): mi.author = None if not mi.category or 'unknown' in mi.category.lower(): mi.category = None if not mi.publisher or 'unknown' in mi.publisher.lower() or \ 'some publisher' in mi.publisher.lower(): mi.publisher = None return mi
def get_metadata(self): title, authors = self.get_title_and_authors() mi = MetaInformation(title, authors) publisher = str(self.publisher.text()).strip() if publisher: mi.publisher = publisher author_sort = str(self.author_sort.text()).strip() if author_sort: mi.author_sort = author_sort comments = self.comment.html if comments: mi.comments = comments mi.series_index = float(self.series_index.value()) series = str(self.series.currentText()).strip() if series: mi.series = series tags = [t.strip() for t in str(self.tags.text()).strip().split(',')] if tags: mi.tags = tags return mi
def get_metadata(self): title, authors = self.get_title_and_authors() mi = MetaInformation(title, authors) publisher = unicode(self.publisher.text()).strip() if publisher: mi.publisher = publisher author_sort = unicode(self.author_sort.text()).strip() if author_sort: mi.author_sort = author_sort comments = self.comment.html if comments: mi.comments = comments mi.series_index = float(self.series_index.value()) series = unicode(self.series.currentText()).strip() if series: mi.series = series tags = [t.strip() for t in unicode(self.tags.text()).strip().split(',')] if tags: mi.tags = tags return mi
def get_metadata(stream, extract_cover=True): whitespace = re.compile(r'\s+') def normalize(s): return whitespace.sub(' ', s).strip() with ZipFile(stream) as zf: meta = zf.read('meta.xml') root = fromstring(meta) def find(field): ns, tag = fields[field] ans = root.xpath('//ns0:{}'.format(tag), namespaces={'ns0': ns}) if ans: return normalize( tostring(ans[0], method='text', encoding='unicode', with_tail=False)).strip() mi = MetaInformation(None, []) title = find('title') if title: mi.title = title creator = find('initial-creator') or find('creator') if creator: mi.authors = string_to_authors(creator) desc = find('description') if desc: mi.comments = desc lang = find('language') if lang and canonicalize_lang(lang): mi.languages = [canonicalize_lang(lang)] kw = find('keyword') or find('keywords') if kw: mi.tags = [x.strip() for x in kw.split(',') if x.strip()] data = {} for tag in root.xpath('//ns0:user-defined', namespaces={'ns0': fields['user-defined'][0]}): name = (tag.get('{%s}name' % METANS) or '').lower() vtype = tag.get('{%s}value-type' % METANS) or 'string' val = tag.text if name and val: if vtype == 'boolean': val = val == 'true' data[name] = val opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata'): # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.identifiers'): try: mi.identifiers = json.loads(data['opf.identifiers']) except Exception: pass if data.get('opf.rating'): try: mi.rating = max(0, min(float(data['opf.rating']), 10)) except Exception: pass if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except Exception: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', False) if not opfnocover: try: read_cover(stream, zf, mi, opfmeta, extract_cover) except Exception: pass # Do not let an error reading the cover prevent reading other data return mi