def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if 'title' in data: mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif 'creator' in data: mi.authors = string_to_authors(data['creator']) if 'description' in data: mi.comments = data['description'] if 'language' in data: mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata','') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if data.has_key('title'): mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif data.has_key('creator'): mi.authors = string_to_authors(data['creator']) if data.has_key('description'): mi.comments = data['description'] if data.has_key('language'): mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata','') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) section_data = None for i in range(1, pheader.num_sections): raw_data = pheader.section_data(i) section_header = SectionHeader(raw_data) if section_header.type == DATATYPE_METADATA: section_data = raw_data[8:] break if not section_data: return mi default_encoding = 'latin-1' record_count, = struct.unpack('>H', section_data[0:2]) adv = 0 title = None author = None pubdate = 0 for i in range(record_count): try: type, length = struct.unpack_from('>HH', section_data, 2 + adv) except struct.error: break # CharSet if type == 1: val, = struct.unpack('>H', section_data[6+adv:8+adv]) default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1') # Author elif type == 4: author = section_data[6+adv+(2*length)] # Title elif type == 5: title = section_data[6+adv+(2*length)] # Publication Date elif type == 6: pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4]) adv += 2*length if title: mi.title = title.replace('\0', '').decode(default_encoding, 'replace') if author: author = author.replace('\0', '').decode(default_encoding, 'replace') mi.author = author.split(',') mi.pubdate = datetime.fromtimestamp(pubdate) return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) section_data = None for i in range(1, pheader.num_sections): raw_data = pheader.section_data(i) section_header = SectionHeader(raw_data) if section_header.type == DATATYPE_METADATA: section_data = raw_data[8:] break if not section_data: return mi default_encoding = 'latin-1' record_count, = struct.unpack('>H', section_data[0:2]) adv = 0 title = None author = None pubdate = 0 for i in xrange(record_count): try: type, length = struct.unpack_from('>HH', section_data, 2 + adv) except struct.error: break # CharSet if type == 1: val, = struct.unpack('>H', section_data[6+adv:8+adv]) default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1') # Author elif type == 4: author = section_data[6+adv+(2*length)] # Title elif type == 5: title = section_data[6+adv+(2*length)] # Publication Date elif type == 6: pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4]) adv += 2*length if title: mi.title = title.replace('\0', '').decode(default_encoding, 'replace') if author: author = author.replace('\0', '').decode(default_encoding, 'replace') mi.author = author.split(',') mi.pubdate = datetime.fromtimestamp(pubdate) return mi
def do_set_metadata(opts, mi, stream, stream_type): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) from_opf = getattr(opts, 'from_opf', None) if from_opf is not None: from calibre.ebooks.metadata.opf2 import OPF opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata() mi.smart_update(opf_mi) for pref in config().option_set.preferences: if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort', 'author_sort', 'get_cover', 'cover', 'tags', 'lrf_bookid', 'identifiers'): continue val = getattr(opts, pref.name, None) if val is not None: setattr(mi, pref.name, val) if getattr(opts, 'authors', None) is not None: mi.authors = string_to_authors(opts.authors) mi.author_sort = authors_to_sort_string(mi.authors) if getattr(opts, 'author_sort', None) is not None: mi.author_sort = opts.author_sort if getattr(opts, 'title_sort', None) is not None: mi.title_sort = opts.title_sort elif getattr(opts, 'title', None) is not None: mi.title_sort = title_sort(opts.title) if getattr(opts, 'tags', None) is not None: mi.tags = [t.strip() for t in opts.tags.split(',')] if getattr(opts, 'series', None) is not None: mi.series = opts.series.strip() if getattr(opts, 'series_index', None) is not None: mi.series_index = float(opts.series_index.strip()) if getattr(opts, 'pubdate', None) is not None: mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False) if getattr(opts, 'identifiers', None): val = { k.strip(): v.strip() for k, v in (x.partition(':')[0::2] for x in opts.identifiers) } if val: orig = mi.get_identifiers() orig.update(val) val = {k: v for k, v in orig.iteritems() if k and v} mi.set_identifiers(val) if getattr(opts, 'cover', None) is not None: ext = os.path.splitext(opts.cover)[1].replace('.', '').upper() mi.cover_data = (ext, open(opts.cover, 'rb').read()) with force_identifiers: set_metadata(stream, mi, stream_type)
def do_set_metadata(opts, mi, stream, stream_type): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) from_opf = getattr(opts, 'from_opf', None) if from_opf is not None: from calibre.ebooks.metadata.opf2 import OPF opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata() mi.smart_update(opf_mi) for pref in config().option_set.preferences: if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort', 'author_sort', 'get_cover', 'cover', 'tags', 'lrf_bookid', 'identifiers'): continue val = getattr(opts, pref.name, None) if val is not None: setattr(mi, pref.name, val) if getattr(opts, 'authors', None) is not None: mi.authors = string_to_authors(opts.authors) mi.author_sort = authors_to_sort_string(mi.authors) if getattr(opts, 'author_sort', None) is not None: mi.author_sort = opts.author_sort if getattr(opts, 'title_sort', None) is not None: mi.title_sort = opts.title_sort elif getattr(opts, 'title', None) is not None: mi.title_sort = title_sort(opts.title) if getattr(opts, 'tags', None) is not None: mi.tags = [t.strip() for t in opts.tags.split(',')] if getattr(opts, 'series', None) is not None: mi.series = opts.series.strip() if getattr(opts, 'series_index', None) is not None: mi.series_index = float(opts.series_index.strip()) if getattr(opts, 'pubdate', None) is not None: mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False) if getattr(opts, 'identifiers', None): val = {k.strip():v.strip() for k, v in (x.partition(':')[0::2] for x in opts.identifiers)} if val: orig = mi.get_identifiers() orig.update(val) val = {k:v for k, v in iteritems(orig) if k and v} mi.set_identifiers(val) if getattr(opts, 'cover', None) is not None: ext = os.path.splitext(opts.cover)[1].replace('.', '').upper() mi.cover_data = (ext, open(opts.cover, 'rb').read()) with force_identifiers: set_metadata(stream, mi, stream_type)
def populate(self, entries, browser, verbose=False, api_key=''): for x in entries: try: id_url = entry_id(x)[0].text title = self.get_title(x) except: report(verbose) mi = MetaInformation(title, self.get_authors(x)) try: if api_key != '': id_url = id_url + "?apikey=" + api_key raw = browser.open(id_url).read() feed = etree.fromstring(raw) x = entry(feed)[0] except Exception, e: if verbose: print 'Failed to get all details for an entry' print e mi.comments = self.get_description(x, verbose) mi.tags = self.get_tags(x, verbose) mi.isbn = self.get_isbn(x, verbose) mi.publisher = self.get_publisher(x, verbose) mi.pubdate = self.get_date(x, verbose) self.append(mi)
def metadata_from_filename(name, pat=None, fallback_pat=None): if isbytestring(name): name = name.decode(filesystem_encoding, 'replace') name = name.rpartition('.')[0] mi = MetaInformation(None, None) if pat is None: pat = re.compile(prefs.get('filename_pattern')) name = name.replace('_', ' ') match = pat.search(name) if match is None and fallback_pat is not None: match = fallback_pat.search(name) if match is not None: try: mi.title = match.group('title') except IndexError: pass try: au = match.group('author') aus = string_to_authors(au) if aus: mi.authors = aus if prefs['swap_author_names'] and mi.authors: def swap(a): if ',' in a: parts = a.split(',', 1) else: parts = a.split(None, 1) if len(parts) > 1: t = parts[-1] parts = parts[:-1] parts.insert(0, t) return ' '.join(parts) mi.authors = [swap(x) for x in mi.authors] except (IndexError, ValueError): pass try: mi.series = match.group('series') except IndexError: pass try: si = match.group('series_index') mi.series_index = float(si) except (IndexError, ValueError, TypeError): pass try: si = match.group('isbn') mi.isbn = si except (IndexError, ValueError): pass try: publisher = match.group('publisher') mi.publisher = publisher except (IndexError, ValueError): pass try: pubdate = match.group('published') if pubdate: from calibre.utils.date import parse_only_date mi.pubdate = parse_only_date(pubdate) except: pass try: comments = match.group('comments') mi.comments = comments except (IndexError, ValueError): pass if mi.is_null('title'): mi.title = name return mi
def convert_comic_md_to_calibre_md(self, comic_metadata): ''' Maps the entries in the comic_metadata to calibre metadata ''' import unicodedata from calibre.ebooks.metadata import MetaInformation from calibre.utils.date import parse_only_date from datetime import date from calibre.utils.localization import calibre_langcode_to_name if self.comic_md_in_calibre_format: return # start with a fresh calibre metadata mi = MetaInformation(None, None) co = comic_metadata # shorten some functions role = partial(get_role, credits=co.credits) update_field = partial(update_calibre_field, target=mi) # Get title, if no title, try to assign series infos if co.title: mi.title = co.title elif co.series: mi.title = co.series if co.issue: mi.title += " " + str(co.issue) else: mi.title = "" # tags if co.tags != [] and prefs['import_tags']: if prefs['overwrite_calibre_tags']: mi.tags = co.tags else: mi.tags = list(set(self.calibre_metadata.tags + co.tags)) # simple metadata update_field("authors", role(WRITER)) update_field("series", co.series) update_field("rating", co.criticalRating) update_field("publisher", co.publisher) # special cases if co.language: update_field("language", calibre_langcode_to_name(co.language)) if co.comments: update_field("comments", co.comments.strip()) # issue if co.issue: try: if not python3 and isinstance(co.issue, unicode): mi.series_index = unicodedata.numeric(co.issue) else: mi.series_index = float(co.issue) except ValueError: pass # pub date puby = co.year pubm = co.month if puby is not None: try: dt = date(int(puby), 6 if pubm is None else int(pubm), 15) dt = parse_only_date(str(dt)) mi.pubdate = dt except: pass # custom columns update_column = partial( update_custom_column, calibre_metadata=mi, custom_cols=self.db.field_metadata.custom_field_metadata()) # artists update_column(prefs['penciller_column'], role(PENCILLER)) update_column(prefs['inker_column'], role(INKER)) update_column(prefs['colorist_column'], role(COLORIST)) update_column(prefs['letterer_column'], role(LETTERER)) update_column(prefs['cover_artist_column'], role(COVER_ARTIST)) update_column(prefs['editor_column'], role(EDITOR)) # others update_column(prefs['storyarc_column'], co.storyArc) update_column(prefs['characters_column'], co.characters) update_column(prefs['teams_column'], co.teams) update_column(prefs['locations_column'], co.locations) update_column(prefs['genre_column'], co.genre) ensure_int(co.issueCount, update_column, prefs['count_column'], co.issueCount) ensure_int(co.volume, update_column, prefs['volume_column'], co.volume) if prefs['auto_count_pages']: update_column(prefs['pages_column'], self.count_pages()) else: update_column(prefs['pages_column'], co.pageCount) if prefs['get_image_sizes']: update_column(prefs['image_size_column'], self.get_picture_size()) update_column(prefs['comicvine_column'], '<a href="{}">Comic Vine</a>'.format(co.webLink)) update_column(prefs['manga_column'], co.manga) self.comic_md_in_calibre_format = mi
def get_metadata_(src, encoding=None): if not isinstance(src, unicode): if not encoding: src = xml_to_unicode(src)[0] else: src = src.decode(encoding, 'replace') # Meta data definitions as in # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9 # Title title = None pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) src = src[:150000] # Searching shouldn't take too long match = pat.search(src) if match: title = match.group(2) else: for x in ('DC.title', 'DCTERMS.title', 'Title'): pat = get_meta_regexp_(x) match = pat.search(src) if match: title = match.group(1) break if not title: pat = re.compile('<title>([^<>]+?)</title>', re.IGNORECASE) match = pat.search(src) if match: title = match.group(1) # Author author = None pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: author = match.group(2).replace(',', ';') else: for x in ('Author', 'DC.creator.aut', 'DCTERMS.creator.aut', 'DC.creator'): pat = get_meta_regexp_(x) match = pat.search(src) if match: author = match.group(1) break # Create MetaInformation with Title and Author ent_pat = re.compile(r'&(\S+)?;') if title: title = ent_pat.sub(entity_to_unicode, title) if author: author = ent_pat.sub(entity_to_unicode, author) mi = MetaInformation(title, [author] if author else None) # Publisher publisher = None pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: publisher = match.group(2) else: for x in ('Publisher', 'DC.publisher', 'DCTERMS.publisher'): pat = get_meta_regexp_(x) match = pat.search(src) if match: publisher = match.group(1) break if publisher: mi.publisher = ent_pat.sub(entity_to_unicode, publisher) # ISBN isbn = None pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: isbn = match.group(1) else: for x in ('ISBN', 'DC.identifier.ISBN', 'DCTERMS.identifier.ISBN'): pat = get_meta_regexp_(x) match = pat.search(src) if match: isbn = match.group(1) break if isbn: mi.isbn = re.sub(r'[^0-9xX]', '', isbn) # LANGUAGE language = None pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: language = match.group(1) else: for x in ('DC.language', 'DCTERMS.language'): pat = get_meta_regexp_(x) match = pat.search(src) if match: language = match.group(1) break if language: mi.language = language # PUBDATE pubdate = None pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: pubdate = match.group(1) else: for x in ('Pubdate', 'Date of publication', 'DC.date.published', 'DC.date.publication', 'DC.date.issued', 'DCTERMS.issued'): pat = get_meta_regexp_(x) match = pat.search(src) if match: pubdate = match.group(1) break if pubdate: try: mi.pubdate = parse_date(pubdate) except: pass # TIMESTAMP timestamp = None pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: timestamp = match.group(1) else: for x in ('Timestamp', 'Date of creation', 'DC.date.created', 'DC.date.creation', 'DCTERMS.created'): pat = get_meta_regexp_(x) match = pat.search(src) if match: timestamp = match.group(1) break if timestamp: try: mi.timestamp = parse_date(timestamp) except: pass # SERIES series = None pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: series = match.group(1) else: pat = get_meta_regexp_("Series") match = pat.search(src) if match: series = match.group(1) if series: pat = re.compile(r'\[([.0-9]+)\]') match = pat.search(series) series_index = None if match is not None: try: series_index = float(match.group(1)) except: pass series = series.replace(match.group(), '').strip() mi.series = ent_pat.sub(entity_to_unicode, series) if series_index is None: pat = get_meta_regexp_("Seriesnumber") match = pat.search(src) if match: try: series_index = float(match.group(1)) except: pass if series_index is not None: mi.series_index = series_index # RATING rating = None pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: rating = match.group(1) else: pat = get_meta_regexp_("Rating") match = pat.search(src) if match: rating = match.group(1) if rating: try: mi.rating = float(rating) if mi.rating < 0: mi.rating = 0 if mi.rating > 5: mi.rating /= 2. if mi.rating > 5: mi.rating = 0 except: pass # COMMENTS comments = None pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: comments = match.group(1) else: pat = get_meta_regexp_("Comments") match = pat.search(src) if match: comments = match.group(1) if comments: mi.comments = ent_pat.sub(entity_to_unicode, comments) # TAGS tags = None pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: tags = match.group(1) else: pat = get_meta_regexp_("Tags") match = pat.search(src) if match: tags = match.group(1) if tags: mi.tags = [ x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",") ] # Ready to return MetaInformation return mi
def get_metadata_(src, encoding=None): if not isinstance(src, unicode): if not encoding: src = xml_to_unicode(src)[0] else: src = src.decode(encoding, "replace") # Meta data definitions as in # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9 # Title title = None pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) src = src[:150000] # Searching shouldn't take too long match = pat.search(src) if match: title = match.group(2) else: for x in ("DC.title", "DCTERMS.title", "Title"): pat = get_meta_regexp_(x) match = pat.search(src) if match: title = match.group(1) break if not title: pat = re.compile("<title>([^<>]+?)</title>", re.IGNORECASE) match = pat.search(src) if match: title = match.group(1) # Author author = None pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: author = match.group(2).replace(",", ";") else: for x in ("Author", "DC.creator.aut", "DCTERMS.creator.aut", "DC.creator"): pat = get_meta_regexp_(x) match = pat.search(src) if match: author = match.group(1) break # Create MetaInformation with Title and Author ent_pat = re.compile(r"&(\S+)?;") if title: title = ent_pat.sub(entity_to_unicode, title) if author: author = ent_pat.sub(entity_to_unicode, author) mi = MetaInformation(title, [author] if author else None) # Publisher publisher = None pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: publisher = match.group(2) else: for x in ("Publisher", "DC.publisher", "DCTERMS.publisher"): pat = get_meta_regexp_(x) match = pat.search(src) if match: publisher = match.group(1) break if publisher: mi.publisher = ent_pat.sub(entity_to_unicode, publisher) # ISBN isbn = None pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: isbn = match.group(1) else: for x in ("ISBN", "DC.identifier.ISBN", "DCTERMS.identifier.ISBN"): pat = get_meta_regexp_(x) match = pat.search(src) if match: isbn = match.group(1) break if isbn: mi.isbn = re.sub(r"[^0-9xX]", "", isbn) # LANGUAGE language = None pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: language = match.group(1) else: for x in ("DC.language", "DCTERMS.language"): pat = get_meta_regexp_(x) match = pat.search(src) if match: language = match.group(1) break if language: mi.language = language # PUBDATE pubdate = None pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: pubdate = match.group(1) else: for x in ( "Pubdate", "Date of publication", "DC.date.published", "DC.date.publication", "DC.date.issued", "DCTERMS.issued", ): pat = get_meta_regexp_(x) match = pat.search(src) if match: pubdate = match.group(1) break if pubdate: try: mi.pubdate = parse_date(pubdate) except: pass # TIMESTAMP timestamp = None pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: timestamp = match.group(1) else: for x in ("Timestamp", "Date of creation", "DC.date.created", "DC.date.creation", "DCTERMS.created"): pat = get_meta_regexp_(x) match = pat.search(src) if match: timestamp = match.group(1) break if timestamp: try: mi.timestamp = parse_date(timestamp) except: pass # SERIES series = None pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: series = match.group(1) else: pat = get_meta_regexp_("Series") match = pat.search(src) if match: series = match.group(1) if series: pat = re.compile(r"\[([.0-9]+)\]") match = pat.search(series) series_index = None if match is not None: try: series_index = float(match.group(1)) except: pass series = series.replace(match.group(), "").strip() mi.series = ent_pat.sub(entity_to_unicode, series) if series_index is None: pat = get_meta_regexp_("Seriesnumber") match = pat.search(src) if match: try: series_index = float(match.group(1)) except: pass if series_index is not None: mi.series_index = series_index # RATING rating = None pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: rating = match.group(1) else: pat = get_meta_regexp_("Rating") match = pat.search(src) if match: rating = match.group(1) if rating: try: mi.rating = float(rating) if mi.rating < 0: mi.rating = 0 if mi.rating > 5: mi.rating /= 2.0 if mi.rating > 5: mi.rating = 0 except: pass # COMMENTS comments = None pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: comments = match.group(1) else: pat = get_meta_regexp_("Comments") match = pat.search(src) if match: comments = match.group(1) if comments: mi.comments = ent_pat.sub(entity_to_unicode, comments) # TAGS tags = None pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: tags = match.group(1) else: pat = get_meta_regexp_("Tags") match = pat.search(src) if match: tags = match.group(1) if tags: mi.tags = [x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",")] # Ready to return MetaInformation return mi
def convert_comic_md_to_calibre_md(self, comic_metadata): ''' Maps the entries in the comic_metadata to calibre metadata ''' import unicodedata from calibre.ebooks.metadata import MetaInformation from calibre.utils.date import parse_only_date from datetime import date from calibre.utils.localization import calibre_langcode_to_name if self.comic_md_in_calibre_format: return # synonyms for artists WRITER = ['writer', 'plotter', 'scripter'] PENCILLER = ['artist', 'penciller', 'penciler', 'breakdowns'] INKER = ['inker', 'artist', 'finishes'] COLORIST = ['colorist', 'colourist', 'colorer', 'colourer'] LETTERER = ['letterer'] COVER_ARTIST = ['cover', 'covers', 'coverartist', 'cover artist'] EDITOR = ['editor'] # start with a fresh calibre metadata mi = MetaInformation(None, None) co = comic_metadata # shorten some functions role = partial(get_role, credits=co.credits) update_field = partial(update_calibre_field, target=mi) # Get title, if no title, try to assign series infos if co.title: mi.title = co.title elif co.series: mi.title = co.series if co.issue: mi.title += " " + str(co.issue) else: mi.title = "" # tags if co.tags != [] and prefs['import_tags']: if prefs['overwrite_calibre_tags']: mi.tags = co.tags else: mi.tags = list(set(self.calibre_metadata.tags + co.tags)) # simple metadata update_field("authors", role(WRITER)) update_field("series", co.series) update_field("rating", co.criticalRating) update_field("publisher", co.publisher) # special cases if co.language: update_field("language", calibre_langcode_to_name(co.language)) if co.comments: update_field("comments", co.comments.strip()) # issue if co.issue: if isinstance(co.issue, unicode): mi.series_index = unicodedata.numeric(co.issue) else: mi.series_index = float(co.issue) # pub date puby = co.year pubm = co.month if puby is not None: try: dt = date(int(puby), 6 if pubm is None else int(pubm), 15) dt = parse_only_date(str(dt)) mi.pubdate = dt except: pass # custom columns custom_cols = self.db.field_metadata.custom_field_metadata() update_column = partial(update_custom_column, calibre_metadata=mi, custom_cols=custom_cols) # artists update_column(prefs['penciller_column'], role(PENCILLER)) update_column(prefs['inker_column'], role(INKER)) update_column(prefs['colorist_column'], role(COLORIST)) update_column(prefs['letterer_column'], role(LETTERER)) update_column(prefs['cover_artist_column'], role(COVER_ARTIST)) update_column(prefs['editor_column'], role(EDITOR)) # others update_column(prefs['storyarc_column'], co.storyArc) update_column(prefs['characters_column'], co.characters) update_column(prefs['teams_column'], co.teams) update_column(prefs['locations_column'], co.locations) update_column(prefs['volume_column'], co.volume) update_column(prefs['genre_column'], co.genre) self.comic_md_in_calibre_format = mi
def get_metadata(stream, extract_cover=True): whitespace = re.compile(r'\s+') def normalize(s): return whitespace.sub(' ', s).strip() with ZipFile(stream) as zf: meta = zf.read('meta.xml') root = fromstring(meta) def find(field): ns, tag = fields[field] ans = root.xpath('//ns0:{}'.format(tag), namespaces={'ns0': ns}) if ans: return normalize( tostring(ans[0], method='text', encoding='unicode', with_tail=False)).strip() mi = MetaInformation(None, []) title = find('title') if title: mi.title = title creator = find('initial-creator') or find('creator') if creator: mi.authors = string_to_authors(creator) desc = find('description') if desc: mi.comments = desc lang = find('language') if lang and canonicalize_lang(lang): mi.languages = [canonicalize_lang(lang)] kw = find('keyword') or find('keywords') if kw: mi.tags = [x.strip() for x in kw.split(',') if x.strip()] data = {} for tag in root.xpath('//ns0:user-defined', namespaces={'ns0': fields['user-defined'][0]}): name = (tag.get('{%s}name' % METANS) or '').lower() vtype = tag.get('{%s}value-type' % METANS) or 'string' val = tag.text if name and val: if vtype == 'boolean': val = val == 'true' data[name] = val opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata'): # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.identifiers'): try: mi.identifiers = json.loads(data['opf.identifiers']) except Exception: pass if data.get('opf.rating'): try: mi.rating = max(0, min(float(data['opf.rating']), 10)) except Exception: pass if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except Exception: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', False) if not opfnocover: try: read_cover(stream, zf, mi, opfmeta, extract_cover) except Exception: pass # Do not let an error reading the cover prevent reading other data return mi
def _do_split(self, db, source_id, misource, splitepub, origlines, newspecs, deftitle=None): linenums, changedtocs, checkedalways = newspecs # logger.debug("updated tocs:%s"%changedtocs) if not self.has_lines(linenums): return #logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() #logger.debug("linenums:%s"%linenums) defauthors = None if not deftitle and prefs['copytoctitle']: if linenums[0] in changedtocs: deftitle=changedtocs[linenums[0]][0] # already unicoded()'ed elif len(origlines[linenums[0]]['toc']) > 0: deftitle=unicode(origlines[linenums[0]]['toc'][0]) #logger.debug("deftitle:%s"%deftitle) if not deftitle and prefs['copytitle']: deftitle = _("%s Split") % misource.title if prefs['copyauthors']: defauthors = misource.authors mi = MetaInformation(deftitle,defauthors) if prefs['copytags']: mi.tags = misource.tags # [item for sublist in tagslists for item in sublist] if prefs['copylanguages']: mi.languages = misource.languages if prefs['copyseries']: mi.series = misource.series if prefs['copydate']: mi.timestamp = misource.timestamp if prefs['copyrating']: mi.rating = misource.rating if prefs['copypubdate']: mi.pubdate = misource.pubdate if prefs['copypublisher']: mi.publisher = misource.publisher if prefs['copyidentifiers']: mi.set_identifiers(misource.get_identifiers()) if prefs['copycomments'] and misource.comments: mi.comments = "<p>"+_("Split from:")+"</p>" + misource.comments #logger.debug("mi:%s"%mi) book_id = db.create_book_entry(mi, add_duplicates=True) if prefs['copycover'] and misource.has_cover: db.set_cover(book_id, db.cover(source_id,index_is_id=True)) #logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: #logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) label = coldef['label'] value = db.get_custom(source_id, label=label, index_is_id=True) if value: db.set_custom(book_id,value,label=label,commit=False) #logger.debug("3.5:%s"%(time.time()-self.t)) self.t = time.time() if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \ and prefs['sourcetemplate']: val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource) #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val)) label = custom_columns[prefs['sourcecol']]['label'] if custom_columns[prefs['sourcecol']]['datatype'] == 'series': val = val + (" [%s]"%self.book_count) db.set_custom(book_id, val, label=label, commit=False) self.book_count = self.book_count+1 db.commit() #logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() editconfig_txt = _('You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.') if prefs['editmetadata']: confirm('\n'+_('''The book for the new Split EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows. You can fill in the metadata yourself, or use download metadata for known books. If you download or add a cover image, it will be included in the generated EPUB.''')+'\n\n'+ editconfig_txt+'\n', 'epubsplit_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) try: QApplication.setOverrideCursor(QCursor(Qt.WaitCursor)) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() self.gui.status_bar.show_message(_('Splitting off from EPUB...'), 60000) mi = db.get_metadata(book_id,index_is_id=True) outputepub = PersistentTemporaryFile(suffix='.epub') coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') outlist = list(set(linenums + checkedalways)) outlist.sort() splitepub.write_split_epub(outputepub, outlist, changedtocs=changedtocs, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, coverjpgpath=coverjpgpath) #logger.debug("6:%s"%(time.time()-self.t)) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', outputepub, index_is_id=True) #logger.debug("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed(current, self.previous) finally: QApplication.restoreOverrideCursor() if not prefs['editmetadata']: confirm('<p>'+ '</p><p>'.join([_('<b><u>%s</u> by %s</b> has been created and default metadata filled in.')%(mi.title,', '.join(mi.authors)), _('EpubSplit now skips the Edit Metadata step by default.'), editconfig_txt])+ '</p>', 'epubsplit_created_now_no_edit_again', self.gui)
def _do_split(self, db, source_id, misource, splitepub, newspecs, deftitle=None, editmeta=True): linenums, changedtocs = newspecs # logger.debug("updated tocs:%s"%changedtocs) # logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() # logger.debug("linenums:%s"%linenums) defauthors = None if not deftitle and prefs['copytitle']: deftitle = _("نمونه %s") % misource.title if prefs['copyauthors']: defauthors = misource.authors mi = MetaInformation(deftitle, defauthors) if prefs['copytags']: mi.tags = misource.tags # [item for sublist in tagslists for item in sublist] if prefs['copylanguages']: mi.languages = misource.languages if prefs['copyseries']: mi.series = misource.series if prefs['copydate']: mi.timestamp = misource.timestamp if prefs['copyrating']: mi.rating = misource.rating if prefs['copypubdate']: mi.pubdate = misource.pubdate if prefs['copypublisher']: mi.publisher = misource.publisher if prefs['copyidentifiers']: mi.set_identifiers(misource.get_identifiers()) if prefs['copycomments'] and misource.comments: mi.comments = _("Split from:") + "\n\n" + misource.comments # logger.debug("mi:%s"%mi) book_id = db.create_book_entry(mi, add_duplicates=True) if prefs['copycover'] and misource.has_cover: db.set_cover(book_id, db.cover(source_id, index_is_id=True)) # logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): # logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: # logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] # logger.debug("coldef:%s"%coldef) label = coldef['label'] value = db.get_custom(source_id, label=label, index_is_id=True) if value: db.set_custom(book_id, value, label=label, commit=False) # logger.debug("3.5:%s"%(time.time()-self.t)) self.t = time.time() if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \ and prefs['sourcetemplate']: val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource) # logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val)) label = custom_columns[prefs['sourcecol']]['label'] db.set_custom(book_id, val, label=label, commit=False) db.commit() # logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) # logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() # if editmeta: # confirm('\n'+_('کتاب نمونه ساخته شود؟')+'\n', # 'epubsplit_created_now_edit_again', # self.gui) # # self.gui.iactions['Edit Metadata'].edit_metadata(False) # logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() self.gui.status_bar.show_message(_('فایل نمونه ساخته شد'), 60000) mi = db.get_metadata(book_id, index_is_id=True) outputepub = PersistentTemporaryFile(suffix='.epub') coverjpgpath = None # if mi.has_cover: # # grab the path to the real image. # coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') splitepub.write_split_epub(outputepub, linenums, changedtocs=changedtocs, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, coverjpgpath=coverjpgpath) # logger.debug("6:%s"%(time.time()-self.t)) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', outputepub, index_is_id=True) # logger.debug("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed(current, self.previous)
def _do_split(self, db, source_id, misource, splitepub, origlines, newspecs, deftitle=None): linenums, changedtocs, checkedalways = newspecs # logger.debug("updated tocs:%s"%changedtocs) if not self.has_lines(linenums): return #logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() #logger.debug("linenums:%s"%linenums) defauthors = None if not deftitle and prefs['copytoctitle']: if linenums[0] in changedtocs: deftitle = changedtocs[linenums[0]][0] # already unicoded()'ed elif len(origlines[linenums[0]]['toc']) > 0: deftitle = unicode(origlines[linenums[0]]['toc'][0]) #logger.debug("deftitle:%s"%deftitle) if not deftitle and prefs['copytitle']: deftitle = _("%s Split") % misource.title if prefs['copyauthors']: defauthors = misource.authors mi = MetaInformation(deftitle, defauthors) if prefs['copytags']: mi.tags = misource.tags # [item for sublist in tagslists for item in sublist] if prefs['copylanguages']: mi.languages = misource.languages if prefs['copyseries']: mi.series = misource.series if prefs['copydate']: mi.timestamp = misource.timestamp if prefs['copyrating']: mi.rating = misource.rating if prefs['copypubdate']: mi.pubdate = misource.pubdate if prefs['copypublisher']: mi.publisher = misource.publisher if prefs['copyidentifiers']: mi.set_identifiers(misource.get_identifiers()) if prefs['copycomments'] and misource.comments: mi.comments = "<p>" + _("Split from:") + "</p>" + misource.comments #logger.debug("mi:%s"%mi) book_id = db.create_book_entry(mi, add_duplicates=True) if prefs['copycover'] and misource.has_cover: db.set_cover(book_id, db.cover(source_id, index_is_id=True)) #logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() custom_columns = self.gui.library_view.model().custom_columns for col, action in six.iteritems(prefs['custom_cols']): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: #logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) label = coldef['label'] value = db.get_custom(source_id, label=label, index_is_id=True) if value: db.set_custom(book_id, value, label=label, commit=False) #logger.debug("3.5:%s"%(time.time()-self.t)) self.t = time.time() if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \ and prefs['sourcetemplate']: val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource) #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val)) label = custom_columns[prefs['sourcecol']]['label'] if custom_columns[prefs['sourcecol']]['datatype'] == 'series': val = val + (" [%s]" % self.book_count) db.set_custom(book_id, val, label=label, commit=False) self.book_count = self.book_count + 1 db.commit() #logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() editconfig_txt = _( 'You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.' ) if prefs['editmetadata']: confirm( '\n' + _('''The book for the new Split EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows. You can fill in the metadata yourself, or use download metadata for known books. If you download or add a cover image, it will be included in the generated EPUB.''' ) + '\n\n' + editconfig_txt + '\n', 'epubsplit_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) try: QApplication.setOverrideCursor(QCursor(Qt.WaitCursor)) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() self.gui.status_bar.show_message(_('Splitting off from EPUB...'), 60000) mi = db.get_metadata(book_id, index_is_id=True) outputepub = PersistentTemporaryFile(suffix='.epub') coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') outlist = list(set(linenums + checkedalways)) outlist.sort() splitepub.write_split_epub(outputepub, outlist, changedtocs=changedtocs, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, coverjpgpath=coverjpgpath) #logger.debug("6:%s"%(time.time()-self.t)) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', outputepub, index_is_id=True) #logger.debug("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed( current, self.previous) if self.gui.cover_flow: self.gui.cover_flow.dataChanged() finally: QApplication.restoreOverrideCursor() if not prefs['editmetadata']: confirm( '<p>' + '</p><p>'.join([ _('<b><u>%s</u> by %s</b> has been created and default metadata filled in.' ) % (mi.title, ', '.join(mi.authors)), _('EpubSplit now skips the Edit Metadata step by default.' ), editconfig_txt ]) + '</p>', 'epubsplit_created_now_no_edit_again', self.gui)
def metadata_from_filename(name, pat=None, fallback_pat=None): if isbytestring(name): name = name.decode(filesystem_encoding, "replace") name = name.rpartition(".")[0] mi = MetaInformation(None, None) if pat is None: pat = re.compile(prefs.get("filename_pattern")) name = name.replace("_", " ") match = pat.search(name) if match is None and fallback_pat is not None: match = fallback_pat.search(name) if match is not None: try: mi.title = match.group("title") except IndexError: pass try: au = match.group("author") aus = string_to_authors(au) if aus: mi.authors = aus if prefs["swap_author_names"] and mi.authors: def swap(a): if "," in a: parts = a.split(",", 1) else: parts = a.split(None, 1) if len(parts) > 1: t = parts[-1] parts = parts[:-1] parts.insert(0, t) return " ".join(parts) mi.authors = [swap(x) for x in mi.authors] except (IndexError, ValueError): pass try: mi.series = match.group("series") except IndexError: pass try: si = match.group("series_index") mi.series_index = float(si) except (IndexError, ValueError, TypeError): pass try: si = match.group("isbn") mi.isbn = si except (IndexError, ValueError): pass try: publisher = match.group("publisher") mi.publisher = publisher except (IndexError, ValueError): pass try: pubdate = match.group("published") if pubdate: from calibre.utils.date import parse_only_date mi.pubdate = parse_only_date(pubdate) except: pass if mi.is_null("title"): mi.title = name return mi