def process_dir(self, dirpath, filenames, book_id): book_id = int(book_id) formats = filter(self.is_ebook_file, filenames) fmts = [os.path.splitext(x)[1][1:].upper() for x in formats] sizes = [os.path.getsize(os.path.join(dirpath, x)) for x in formats] names = [os.path.splitext(x)[0] for x in formats] opf = os.path.join(dirpath, 'metadata.opf') mi = OPF(opf, basedir=dirpath).to_book_metadata() timestamp = os.path.getmtime(opf) path = os.path.relpath(dirpath, self.src_library_path).replace(os.sep, '/') if int(mi.application_id) == book_id: self.books.append({ 'mi': mi, 'timestamp': timestamp, 'formats': list(zip(fmts, sizes, names)), 'id': book_id, 'dirpath': dirpath, 'path': path, }) else: self.mismatched_dirs.append(dirpath) alm = mi.get('author_link_map', {}) for author, link in alm.iteritems(): existing_link, timestamp = self.authors_links.get(author, (None, None)) if existing_link is None or existing_link != link and timestamp < mi.timestamp: self.authors_links[author] = (link, mi.timestamp)
def process_dir(self, dirpath, filenames, book_id): book_id = int(book_id) formats = filter(self.is_ebook_file, filenames) fmts = [os.path.splitext(x)[1][1:].upper() for x in formats] sizes = [os.path.getsize(os.path.join(dirpath, x)) for x in formats] names = [os.path.splitext(x)[0] for x in formats] opf = os.path.join(dirpath, 'metadata.opf') mi = OPF(opf, basedir=dirpath).to_book_metadata() timestamp = os.path.getmtime(opf) path = os.path.relpath(dirpath, self.src_library_path).replace(os.sep, '/') if int(mi.application_id) == book_id: self.books.append({ 'mi': mi, 'timestamp': timestamp, 'formats': list(zip(fmts, sizes, names)), 'id': book_id, 'dirpath': dirpath, 'path': path, }) else: self.mismatched_dirs.append(dirpath) alm = mi.get('author_link_map', {}) for author, link in alm.iteritems(): existing_link, timestamp = self.authors_links.get( author, (None, None)) if existing_link is None or existing_link != link and timestamp < mi.timestamp: self.authors_links[author] = (link, mi.timestamp)
def test_against_opf2(self): # {{{ # opf2 {{{ raw = '''<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id" version="2.0"> <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf"> <dc:identifier opf:scheme="calibre" id="calibre_id">1698</dc:identifier> <dc:identifier opf:scheme="uuid" id="uuid_id">27106d11-0721-44bc-bcdd-2840f31aaec0</dc:identifier> <dc:title>DOCX Demo</dc:title> <dc:creator opf:file-as="Goyal, Kovid" opf:role="aut">Kovid Goyal</dc:creator> <dc:contributor opf:file-as="calibre" opf:role="bkp">calibre (2.57.1) [http://calibre-ebook.com]</dc:contributor> <dc:date>2016-02-17T10:53:08+00:00</dc:date> <dc:description>Demonstration of DOCX support in calibre</dc:description> <dc:publisher>Kovid Goyal</dc:publisher> <dc:identifier opf:scheme="K">xxx</dc:identifier> <dc:language>eng</dc:language> <dc:subject>calibre</dc:subject> <dc:subject>conversion</dc:subject> <dc:subject>docs</dc:subject> <dc:subject>ebook</dc:subject> <meta content="{"Kovid Goyal": ""}" name="calibre:author_link_map"/> <meta content="Demos" name="calibre:series"/> <meta content="1" name="calibre:series_index"/> <meta content="10" name="calibre:rating"/> <meta content="2015-12-11T16:28:36+00:00" name="calibre:timestamp"/> <meta content="DOCX Demo" name="calibre:title_sort"/> <meta content="{"crew.crow": [], "crew.moose": [], "crew": []}" name="calibre:user_categories"/> <meta name="calibre:user_metadata:#number" content="{"kind": "field", "column": "value", "is_csp": false, "name": "Number", "rec_index": 29, "#extra#": null, "colnum": 12, "is_multiple2": {}, "category_sort": "value", "display": {"number_format": null}, "search_terms": ["#number"], "is_editable": true, "datatype": "int", "link_column": "value", "#value#": 31, "is_custom": true, "label": "number", "table": "custom_column_12", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#genre" content="{"kind": "field", "column": "value", "is_csp": false, "name": "Genre", "rec_index": 26, "#extra#": null, "colnum": 9, "is_multiple2": {}, "category_sort": "value", "display": {"use_decorations": 0}, "search_terms": ["#genre"], "is_editable": true, "datatype": "text", "link_column": "value", "#value#": "Demos", "is_custom": true, "label": "genre", "table": "custom_column_9", "is_multiple": null, "is_category": true}"/> <meta name="calibre:user_metadata:#commetns" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Comments", "rec_index": 23, "#extra#": null, "colnum": 13, "is_multiple2": {}, "category_sort": "value", "display": {}, "search_terms": ["#commetns"], "is_editable": true, "datatype": "comments", "link_column": "value", "#value#": "<div><b><i>Testing</i></b> extra <font color=\"#aa0000\">comments</font></div>", "is_custom": true, "label": "commetns", "table": "custom_column_13", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#formats" content="{"kind": "field", "column": "value", "is_csp": false, "name": "Formats", "rec_index": 25, "#extra#": null, "colnum": 4, "is_multiple2": {}, "category_sort": "value", "display": {"composite_template": "{formats}", "contains_html": false, "use_decorations": 0, "composite_sort": "text", "make_category": false}, "search_terms": ["#formats"], "is_editable": true, "datatype": "composite", "link_column": "value", "#value#": "AZW3, DOCX, EPUB", "is_custom": true, "label": "formats", "table": "custom_column_4", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#rating" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Rating", "rec_index": 30, "#extra#": null, "colnum": 1, "is_multiple2": {}, "category_sort": "value", "display": {}, "search_terms": ["#rating"], "is_editable": true, "datatype": "rating", "link_column": "value", "#value#": 10, "is_custom": true, "label": "rating", "table": "custom_column_1", "is_multiple": null, "is_category": true}"/> <meta name="calibre:user_metadata:#series" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Series2", "rec_index": 31, "#extra#": 1.0, "colnum": 5, "is_multiple2": {}, "category_sort": "value", "display": {}, "search_terms": ["#series"], "is_editable": true, "datatype": "series", "link_column": "value", "#value#": "s", "is_custom": true, "label": "series", "table": "custom_column_5", "is_multiple": null, "is_category": true}"/> <meta name="calibre:user_metadata:#tags" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Tags", "rec_index": 33, "#extra#": null, "colnum": 11, "is_multiple2": {"ui_to_list": ",", "cache_to_list": "|", "list_to_ui": ", "}, "category_sort": "value", "display": {"is_names": false, "description": "A tag like column for me"}, "search_terms": ["#tags"], "is_editable": true, "datatype": "text", "link_column": "value", "#value#": ["t1", "t2"], "is_custom": true, "label": "tags", "table": "custom_column_11", "is_multiple": "|", "is_category": true}"/> <meta name="calibre:user_metadata:#yesno" content="{"kind": "field", "column": "value", "is_csp": false, "name": "Yes/No", "rec_index": 34, "#extra#": null, "colnum": 7, "is_multiple2": {}, "category_sort": "value", "display": {}, "search_terms": ["#yesno"], "is_editable": true, "datatype": "bool", "link_column": "value", "#value#": false, "is_custom": true, "label": "yesno", "table": "custom_column_7", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#myenum" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Enum", "rec_index": 28, "#extra#": null, "colnum": 6, "is_multiple2": {}, "category_sort": "value", "display": {"enum_colors": [], "enum_values": ["One", "Two", "Three"], "use_decorations": 0}, "search_terms": ["#myenum"], "is_editable": true, "datatype": "enumeration", "link_column": "value", "#value#": "Two", "is_custom": true, "label": "myenum", "table": "custom_column_6", "is_multiple": null, "is_category": true}"/> <meta name="calibre:user_metadata:#isbn" content="{"kind": "field", "column": "value", "is_csp": false, "name": "ISBN", "rec_index": 27, "#extra#": null, "colnum": 3, "is_multiple2": {}, "category_sort": "value", "display": {"composite_template": "{identifiers:select(isbn)}", "contains_html": false, "use_decorations": 0, "composite_sort": "text", "make_category": false}, "search_terms": ["#isbn"], "is_editable": true, "datatype": "composite", "link_column": "value", "#value#": "", "is_custom": true, "label": "isbn", "table": "custom_column_3", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#authors" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Authors", "rec_index": 22, "#extra#": null, "colnum": 10, "is_multiple2": {"ui_to_list": "&", "cache_to_list": "|", "list_to_ui": " & "}, "category_sort": "value", "display": {"is_names": true}, "search_terms": ["#authors"], "is_editable": true, "datatype": "text", "link_column": "value", "#value#": ["calibre, Kovid Goyal"], "is_custom": true, "label": "authors", "table": "custom_column_10", "is_multiple": "|", "is_category": true}"/> <meta name="calibre:user_metadata:#date" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Date", "rec_index": 24, "#extra#": null, "colnum": 2, "is_multiple2": {}, "category_sort": "value", "display": {"date_format": "dd-MM-yyyy", "description": ""}, "search_terms": ["#date"], "is_editable": true, "datatype": "datetime", "link_column": "value", "#value#": {"__value__": "2016-02-17T10:54:15+00:00", "__class__": "datetime.datetime"}, "is_custom": true, "label": "date", "table": "custom_column_2", "is_multiple": null, "is_category": false}"/> </metadata><manifest><item href="start.html" media-type="text/html" id="m1"/></manifest><spine><itemref idref="m1"/></spine> </package>''' # }}} def compare_metadata(mi2, mi3): self.ae(mi2.get_all_user_metadata(False), mi3.get_all_user_metadata(False)) for field in ALL_METADATA_FIELDS: if field not in 'manifest spine': v2, v3 = getattr(mi2, field, None), getattr(mi3, field, None) self.ae(v2, v3, '%s: %r != %r' % (field, v2, v3)) mi2 = OPF(BytesIO(raw.encode('utf-8'))).to_book_metadata() root = etree.fromstring(raw) root.set('version', '3.0') mi3, _, raster_cover, first_spine_item = read_metadata(root, return_extra_data=True) self.assertIsNone(raster_cover) self.ae('start.html', first_spine_item) compare_metadata(mi2, mi3) apply_metadata(root, mi3, force_identifiers=True) nmi = read_metadata(root) compare_metadata(mi3, nmi) mi3.tags = [] mi3.set('#tags', []) mi3.set('#number', 0) mi3.set('#commetns', '') apply_metadata(root, mi3, update_timestamp=True) self.assertFalse(root.xpath('//*/@name')) nmi = read_metadata(root) self.assertEqual(mi2.tags, nmi.tags) self.assertEqual(mi2.get('#tags'), nmi.get('#tags')) self.assertEqual(mi2.get('#commetns'), nmi.get('#commetns')) self.assertEqual(0, nmi.get('#number')) apply_metadata(root, mi3, apply_null=True) nmi = read_metadata(root) self.assertFalse(nmi.tags) self.assertFalse(nmi.get('#tags')) self.assertFalse(nmi.get('#commetns')) self.assertIsNone(apply_metadata(root, mi3, cover_data=b'x', cover_prefix='xxx', add_missing_cover=False)) self.ae('xxx/cover.jpg', apply_metadata(root, mi3, cover_data=b'x', cover_prefix='xxx'))
def test_against_opf2(self): # {{{ # opf2 {{{ raw = '''<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id" version="2.0"> <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf"> <dc:identifier opf:scheme="calibre" id="calibre_id">1698</dc:identifier> <dc:identifier opf:scheme="uuid" id="uuid_id">27106d11-0721-44bc-bcdd-2840f31aaec0</dc:identifier> <dc:title>DOCX Demo</dc:title> <dc:creator opf:file-as="Goyal, Kovid" opf:role="aut">Kovid Goyal</dc:creator> <dc:contributor opf:file-as="calibre" opf:role="bkp">calibre (2.57.1) [http://calibre-ebook.com]</dc:contributor> <dc:date>2016-02-17T10:53:08+00:00</dc:date> <dc:description>Demonstration of DOCX support in calibre</dc:description> <dc:publisher>Kovid Goyal</dc:publisher> <dc:identifier opf:scheme="K">xxx</dc:identifier> <dc:language>eng</dc:language> <dc:subject>calibre</dc:subject> <dc:subject>conversion</dc:subject> <dc:subject>docs</dc:subject> <dc:subject>ebook</dc:subject> <meta content="{"Kovid Goyal": ""}" name="calibre:author_link_map"/> <meta content="Demos" name="calibre:series"/> <meta content="1" name="calibre:series_index"/> <meta content="10" name="calibre:rating"/> <meta content="2015-12-11T16:28:36+00:00" name="calibre:timestamp"/> <meta content="DOCX Demo" name="calibre:title_sort"/> <meta content="{"crew.crow": [], "crew.moose": [], "crew": []}" name="calibre:user_categories"/> <meta name="calibre:user_metadata:#number" content="{"kind": "field", "column": "value", "is_csp": false, "name": "Number", "rec_index": 29, "#extra#": null, "colnum": 12, "is_multiple2": {}, "category_sort": "value", "display": {"number_format": null}, "search_terms": ["#number"], "is_editable": true, "datatype": "int", "link_column": "value", "#value#": 31, "is_custom": true, "label": "number", "table": "custom_column_12", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#genre" content="{"kind": "field", "column": "value", "is_csp": false, "name": "Genre", "rec_index": 26, "#extra#": null, "colnum": 9, "is_multiple2": {}, "category_sort": "value", "display": {"use_decorations": 0}, "search_terms": ["#genre"], "is_editable": true, "datatype": "text", "link_column": "value", "#value#": "Demos", "is_custom": true, "label": "genre", "table": "custom_column_9", "is_multiple": null, "is_category": true}"/> <meta name="calibre:user_metadata:#commetns" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Comments", "rec_index": 23, "#extra#": null, "colnum": 13, "is_multiple2": {}, "category_sort": "value", "display": {}, "search_terms": ["#commetns"], "is_editable": true, "datatype": "comments", "link_column": "value", "#value#": "<div><b><i>Testing</i></b> extra <font color=\"#aa0000\">comments</font></div>", "is_custom": true, "label": "commetns", "table": "custom_column_13", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#formats" content="{"kind": "field", "column": "value", "is_csp": false, "name": "Formats", "rec_index": 25, "#extra#": null, "colnum": 4, "is_multiple2": {}, "category_sort": "value", "display": {"composite_template": "{formats}", "contains_html": false, "use_decorations": 0, "composite_sort": "text", "make_category": false}, "search_terms": ["#formats"], "is_editable": true, "datatype": "composite", "link_column": "value", "#value#": "AZW3, DOCX, EPUB", "is_custom": true, "label": "formats", "table": "custom_column_4", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#rating" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Rating", "rec_index": 30, "#extra#": null, "colnum": 1, "is_multiple2": {}, "category_sort": "value", "display": {}, "search_terms": ["#rating"], "is_editable": true, "datatype": "rating", "link_column": "value", "#value#": 10, "is_custom": true, "label": "rating", "table": "custom_column_1", "is_multiple": null, "is_category": true}"/> <meta name="calibre:user_metadata:#series" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Series2", "rec_index": 31, "#extra#": 1.0, "colnum": 5, "is_multiple2": {}, "category_sort": "value", "display": {}, "search_terms": ["#series"], "is_editable": true, "datatype": "series", "link_column": "value", "#value#": "s", "is_custom": true, "label": "series", "table": "custom_column_5", "is_multiple": null, "is_category": true}"/> <meta name="calibre:user_metadata:#tags" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Tags", "rec_index": 33, "#extra#": null, "colnum": 11, "is_multiple2": {"ui_to_list": ",", "cache_to_list": "|", "list_to_ui": ", "}, "category_sort": "value", "display": {"is_names": false, "description": "A tag like column for me"}, "search_terms": ["#tags"], "is_editable": true, "datatype": "text", "link_column": "value", "#value#": ["t1", "t2"], "is_custom": true, "label": "tags", "table": "custom_column_11", "is_multiple": "|", "is_category": true}"/> <meta name="calibre:user_metadata:#yesno" content="{"kind": "field", "column": "value", "is_csp": false, "name": "Yes/No", "rec_index": 34, "#extra#": null, "colnum": 7, "is_multiple2": {}, "category_sort": "value", "display": {}, "search_terms": ["#yesno"], "is_editable": true, "datatype": "bool", "link_column": "value", "#value#": false, "is_custom": true, "label": "yesno", "table": "custom_column_7", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#myenum" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Enum", "rec_index": 28, "#extra#": null, "colnum": 6, "is_multiple2": {}, "category_sort": "value", "display": {"enum_colors": [], "enum_values": ["One", "Two", "Three"], "use_decorations": 0}, "search_terms": ["#myenum"], "is_editable": true, "datatype": "enumeration", "link_column": "value", "#value#": "Two", "is_custom": true, "label": "myenum", "table": "custom_column_6", "is_multiple": null, "is_category": true}"/> <meta name="calibre:user_metadata:#isbn" content="{"kind": "field", "column": "value", "is_csp": false, "name": "ISBN", "rec_index": 27, "#extra#": null, "colnum": 3, "is_multiple2": {}, "category_sort": "value", "display": {"composite_template": "{identifiers:select(isbn)}", "contains_html": false, "use_decorations": 0, "composite_sort": "text", "make_category": false}, "search_terms": ["#isbn"], "is_editable": true, "datatype": "composite", "link_column": "value", "#value#": "", "is_custom": true, "label": "isbn", "table": "custom_column_3", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#authors" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Authors", "rec_index": 22, "#extra#": null, "colnum": 10, "is_multiple2": {"ui_to_list": "&", "cache_to_list": "|", "list_to_ui": " & "}, "category_sort": "value", "display": {"is_names": true}, "search_terms": ["#authors"], "is_editable": true, "datatype": "text", "link_column": "value", "#value#": ["calibre, Kovid Goyal"], "is_custom": true, "label": "authors", "table": "custom_column_10", "is_multiple": "|", "is_category": true}"/> <meta name="calibre:user_metadata:#date" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Date", "rec_index": 24, "#extra#": null, "colnum": 2, "is_multiple2": {}, "category_sort": "value", "display": {"date_format": "dd-MM-yyyy", "description": ""}, "search_terms": ["#date"], "is_editable": true, "datatype": "datetime", "link_column": "value", "#value#": {"__value__": "2016-02-17T10:54:15+00:00", "__class__": "datetime.datetime"}, "is_custom": true, "label": "date", "table": "custom_column_2", "is_multiple": null, "is_category": false}"/> </metadata><manifest><item href="start.html" media-type="text/html" id="m1"/></manifest><spine><itemref idref="m1"/></spine> </package>''' # }}} def compare_metadata(mi2, mi3): self.ae(mi2.get_all_user_metadata(False), mi3.get_all_user_metadata(False)) for field in ALL_METADATA_FIELDS: if field not in 'manifest spine': v2, v3 = getattr(mi2, field, None), getattr(mi3, field, None) self.ae(v2, v3, '%s: %r != %r' % (field, v2, v3)) mi2 = OPF(BytesIO(raw.encode('utf-8'))).to_book_metadata() root = etree.fromstring(raw) root.set('version', '3.0') mi3, _, raster_cover, first_spine_item = read_metadata( root, return_extra_data=True) self.assertIsNone(raster_cover) self.ae('start.html', first_spine_item) compare_metadata(mi2, mi3) apply_metadata(root, mi3, force_identifiers=True) nmi = read_metadata(root) compare_metadata(mi3, nmi) mi3.tags = [] mi3.set('#tags', []) mi3.set('#number', 0) mi3.set('#commetns', '') apply_metadata(root, mi3, update_timestamp=True) self.assertFalse(root.xpath('//*/@name')) nmi = read_metadata(root) self.assertEqual(mi2.tags, nmi.tags) self.assertEqual(mi2.get('#tags'), nmi.get('#tags')) self.assertEqual(mi2.get('#commetns'), nmi.get('#commetns')) self.assertEqual(0, nmi.get('#number')) apply_metadata(root, mi3, apply_null=True) nmi = read_metadata(root) self.assertFalse(nmi.tags) self.assertFalse(nmi.get('#tags')) self.assertFalse(nmi.get('#commetns')) self.assertIsNone( apply_metadata(root, mi3, cover_data=b'x', cover_prefix='xxx', add_missing_cover=False)) self.ae('xxx/cover.jpg', apply_metadata(root, mi3, cover_data=b'x', cover_prefix='xxx'))