def add_document_to_bookshelf( document_or_uri: t.Union[BaseDocument, DocumentUri], category_name: str, tags_names: list[str], should_add_to_fts: bool, database_file: t.PathLike, ): """Add the given document to the bookshelf database.""" document = ( create_document(document_or_uri) if isinstance(document_or_uri, DocumentUri) else document_or_uri ) if (existing_doc := Document.get_or_none(uri=document.uri)) is not None: log.debug("Document already in the database...") if should_add_to_fts: log.debug("Checking index...") db_page_count = ( DocumentFTSIndex.select() .where(DocumentFTSIndex.document_id == existing_doc.get_id()) .count() ) if db_page_count == len(document): log.debug("Document index is OK") return else: log.debug("Document index is not well formed. Rebuilding index...") existing_doc.delete_instance()
def test_epub_document_section_at_text_position(asset): uri = DocumentUri.from_filename(asset("epub30-spec.epub")) epub = create_document(uri) position_to_section_title = { 247743: "1.1. Purpose and Scope", 370161: "3.1.1. HTML5", 127838: "4.3.2. Metadata ", 242323: "B.4.1.2. Description", 17556: "Terminology", 34564: "2.6. Rendering and CSS", 349355: "Acknowledgements and Contributors", 363566: "EPUB 3 Changes from EPUB 2.0.1", 371108: "3.1.5. Content Switching", 135534: "4.3.2. Metadata ", 130440: "4.3.2. Metadata ", 60425: "2.2. Reading System Conformance", 49786: "4.6. Scripting", 278229: "3.5.2. Media Overlays Metadata Vocabulary", 63656: "3.4.1. The ", 380720: "4.1.4. Filesystem Container", 173840: "2.1.3.1.3. Vocabulary Association", 25363: "1.2. Roadmap", 114545: "4.2.2. Default Vocabulary", 9227: "EPUB 3 Specifications - Table of Contents", } for (text_position, section_title) in position_to_section_title.items(): section = epub.get_section_at_position(text_position) assert section.title == section_title
def test_serde_toc_tree(asset): uri = DocumentUri.from_filename(asset("epub30-spec.epub")) epub_document = create_document(uri) constructed = load_toc_tree(dump_toc_tree(epub_document.toc_tree)) assert len(epub_document.toc_tree) == len(constructed) compare_pairs = zip(constructed.iter_children(), epub_document.toc_tree.iter_children()) assert all(t.title == s.title for (t, s) in compare_pairs)
def test_wire_serde(asset, library): uri = DocumentUri.from_filename(asset("epub30-spec.epub")) epub_document = create_document(uri) serialized = library.dumps(dump_toc_tree(epub_document.toc_tree)) deserialized = library.loads(serialized) constructed = load_toc_tree(deserialized) compare_pairs = zip(constructed.iter_children(), epub_document.toc_tree.iter_children()) assert all(t.title == s.title for (t, s) in compare_pairs)
def _import_document(category_name, should_add_to_fts, filename): try: uri = DocumentUri.from_filename(filename) with contextlib.closing(create_document(uri)) as document: add_document_to_bookshelf( document, category_name, tags_names=(), should_add_to_fts=should_add_to_fts, database_file=DEFAULT_BOOKSHELF_DATABASE_FILE, ) except: return
def add_to_bookshelf_view(): data = request.json doc_uri = data["document_uri"] try: document = create_document(DocumentUri.from_uri_string(doc_uri)) except: log.exception(f"Failed to open document: {doc_uri}", exc_info=True) abort(400, f"Failed to open document: {doc_uri}") else: if document.__internal__: abort(400, f"Document is an internal document: {doc_uri}") else: local_bookshelf_process_executor.submit( add_document_to_bookshelf, document, data["category"], data["tags"], data["should_add_to_fts"], data["database_file"], ) return {"status": "OK", "document_uri": doc_uri}
def test_epub_metadata(asset): uri = DocumentUri.from_filename(asset("The Diary of a Nobody.epub")) epub = create_document(uri) assert epub.metadata.title == "The Diary of a Nobody" assert epub.metadata.author == "George Grossmith"