def test_epub(version, expected): out = io.BytesIO() with dawn.open(out, mode='w', version=version) as epub: epub.meta['creators'] = [dawn.AS('Me', role='author')] epub.meta['description'] = dawn.AS('Awesome book') epub.meta['titles'] = [dawn.AS('My ePub', lang='en')] for href, title in [ ('README.md', 'README'), ('dawn/__init__.py', 'dawn.py'), ]: with open(href, 'r') as f: item = epub.writestr(href, f.read()) epub.spine.append(item) epub.toc.append(href, title=title) epub.toc.append('main section', 'main title', [ ('sub href', 'sub title'), ('sub href2', 'sub title2', [ ('sub sub href', 'sub sub title'), ]), ]) dbg = '/tmp/epub{}.epub'.format(version) with open(dbg, 'wb') as f: f.write(out.getvalue()) H = hashlib.sha1(out.getvalue()).hexdigest() assert H == expected, 'Debug file is at {}'.format(dbg) os.unlink(dbg)
def test_read(expected): epub = expected.replace('.expected.json', '.epub') dbg = expected.replace('.expected.json', '.debug.json') if not os.path.exists(epub): pytest.skip('Missing fixture') def _ser_toc_item(it): res = {'href': it.href, 'title': it.title} if it.children: res['children'] = [_ser_toc_item(c) for c in it.children] return res with dawn.open(epub) as epub: res = { 'uid': repr(epub.uid), 'version': epub.version, 'spine': [v.iid for v in epub.spine], 'manifest': {k: [v.iid, v.href, v.mimetype] for k, v in epub.manifest.items()}, 'toc': [epub.toc.title, [_ser_toc_item(it) for it in epub.toc]], 'meta': {k: repr(v) for k, v in epub.meta.items()}, } with open(dbg, 'w') as f: json.dump(res, f, indent=4) with open(expected, 'r') as f: exp = json.load(f) assert res == exp os.unlink(dbg)
def dummy(): with dawn.open(io.BytesIO(), mode='w', version='2.0') as epub: yield epub
def test_missing_version(): with pytest.raises(TypeError): dawn.open(None, mode='w')
def test_read_with_opfpath(): with pytest.raises(TypeError): dawn.open(None, 'r', opfpath='blih')
def test_wrong_mode(): with pytest.raises(TypeError): dawn.open(None, 'a')
def dummy(): if not os.path.exists(_dummy): pytest.skip('Missing dummy fixture') with dawn.open(_dummy) as e: yield e
def unpack_epub_file(clusive_user, file, book=None, sort_order=0): """ Process an uploaded EPUB file, returns BookVersion. The book will be owned by the given ClusiveUser. If that argument is None, it will be created as a public library book. If book and sort_order arguments are given, they will be used to locate an existing Book and possibly-existing BookVersion objects. For public library books, the title is used to look for a matching Book. If there is no matching Book or BookVersion, they will be created. If a matching BookVersion already exists it will be overwritten only if the modification date in the EPUB metadata is newer. This method will: * unzip the file into the user media area * find metadata * create a manifest * make a database record It does NOT look for glossary words or parse the text content for vocabulary lists, call scan_book for that. Returns a tuple (bv, changed) of the BookVersion and a boolean value which will be true if new book content was found. If "changed" is False, the bv is an existing one that matches the given file and was not updated. If there are any errors (such as a non-EPUB file), an exception will be raised. """ with open(file, 'rb') as f, dawn.open(f) as upload: book_version = None manifest = make_manifest(upload) title = get_metadata_item(upload, 'titles') or '' author = get_metadata_item(upload, 'creators') or '' description = get_metadata_item(upload, 'description') or '' language = get_metadata_item(upload, 'language') or '' mod_date = upload.meta.get('dates').get('modification') or None # Date, if provided should be UTC according to spec. if mod_date: mod_date = timezone.make_aware(mod_date, timezone=timezone.utc) else: # Many EPUBs are missing this metadata, unfortunately. logger.warning('No mod date found in %s', file) mod_date = timezone.now() if upload.cover: cover = adjust_href(upload, upload.cover.href) # For cover path, need to prefix this path with the directory holding this version of the book. cover = os.path.join(str(sort_order), cover) else: cover = None # Find or create the BOOK. if book: # Was supplied as an arg... sanity check. if book.title != title: logger.warning('DB title: \'%s\', imported title: \'%s\'' % (repr(book.title), repr(title))) raise BookMismatch('Does not appear to be a version of the same book, titles differ.') else: if not clusive_user: # For public books, we require a title, and a book with the same title is assumed to be the same book. if not title: raise BookMalformed('Malformed EPUB, no title found') book = Book.objects.filter(owner=None, title=title).first() if not book: # Make new Book book = Book(owner=clusive_user, title=title, author=author, description=description, cover=cover) book.save() logger.debug('Created new book for import: %s', book) # Find or create the BOOK VERSION book_version = BookVersion.objects.filter(book=book, sortOrder=sort_order).first() if book_version: logger.info('Existing BV was found') if mod_date > book_version.mod_date: logger.info('Replacing older content of this book version') book_version.mod_date = mod_date # Also update metadata that's stored on the book, in case it's changed. book.author = author book.description = description book.cover = cover book.save() else: logger.warning('File %s not imported: already exists with same or newer date' % file) # Short circuit the import and just return the existing object. return book_version, False else: logger.info('Creating new BV: book=%s, sortOrder=%d' % (book, sort_order)) book_version = BookVersion(book=book, sortOrder=sort_order, mod_date=mod_date) book_version.filename = basename(file) if language: book_version.language = language book_version.save() # Unpack the EPUB file dir = book_version.storage_dir if os.path.isdir(dir): logger.debug('Erasing existing content in %s', dir) shutil.rmtree(dir) os.makedirs(dir) with ZipFile(file) as zf: zf.extractall(path=dir) with open(os.path.join(dir, 'manifest.json'), 'w') as mf: mf.write(json.dumps(manifest, indent=4)) logger.debug("Unpacked epub into %s", dir) return book_version, True