def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS): from ebook_converter.ebooks.metadata.book.base import Metadata from ebook_converter.utils.date import parse_only_date from ebook_converter.db.write import get_series_values if 'meta' not in extensions: extensions.append('meta') md = create_markdown_object(extensions) html = md.convert(txt) mi = Metadata(title or 'Unknown') m = md.Meta for k, v in {'date':'pubdate', 'summary':'comments'}.items(): if v not in m and k in m: m[v] = m.pop(k) for k in 'title authors series tags pubdate comments publisher rating'.split(): val = m.get(k) if val: mf = mi.metadata_for_field(k) if not mf.get('is_multiple'): val = val[0] if k == 'series': val, si = get_series_values(val) mi.series_index = 1 if si is None else si if k == 'rating': try: val = max(0, min(int(float(val)), 10)) except Exception: continue if mf.get('datatype') == 'datetime': try: val = parse_only_date(val, assume_utc=False) except Exception: continue setattr(mi, k, val) return mi, HTML_TEMPLATE % (mi.title, html)
def field_from_string(field, raw, field_metadata): ''' Parse the string raw to return an object that is suitable for calling set() on a Metadata object. ''' dt = field_metadata['datatype'] val = object if dt in {'int', 'float'}: val = int(raw) if dt == 'int' else float(raw) elif dt == 'rating': val = float(raw) * 2 elif dt == 'datetime': from ebook_converter.utils.date import parse_only_date val = parse_only_date(raw) elif dt == 'bool': if raw.lower() in {'true', 'yes', 'y'}: val = True elif raw.lower() in {'false', 'no', 'n'}: val = False else: raise ValueError('Unknown value for %s: %s'%(field, raw)) elif dt == 'text': ism = field_metadata['is_multiple'] if ism: val = [x.strip() for x in raw.split(ism['ui_to_list'])] if field == 'identifiers': val = {x.partition(':')[0]:x.partition(':')[-1] for x in val} elif field == 'languages': from ebook_converter.utils.localization import canonicalize_lang val = [canonicalize_lang(x) for x in val] val = [x for x in val if x] if val is object: val = raw return val
def get_comic_book_info(d, mi, series_index='volume'): # See http://code.google.com/p/comicbookinfo/wiki/Example series = d.get('series', '') if series.strip(): mi.series = series si = d.get(series_index, None) if si is None: si = d.get('issue' if series_index == 'volume' else 'volume', None) if si is not None: try: mi.series_index = float(si) except Exception: mi.series_index = 1 if d.get('language', None): lang = canonicalize_lang(d.get('lang')) if lang: mi.languages = [lang] if d.get('rating', -1) > -1: mi.rating = d['rating'] for x in ('title', 'publisher'): y = d.get(x, '').strip() if y: setattr(mi, x, y) tags = d.get('tags', []) if tags: mi.tags = tags authors = [] for credit in d.get('credits', []): if credit.get('role', '') in ('Writer', 'Artist', 'Cartoonist', 'Creator'): x = credit.get('person', '') if x: x = ' '.join((reversed(x.split(', ')))) authors.append(x) if authors: mi.authors = authors comments = d.get('comments', '') if comments and comments.strip(): mi.comments = comments.strip() pubm, puby = d.get('publicationMonth', None), d.get('publicationYear', None) if puby is not None: from ebook_converter.utils.date import parse_only_date from datetime import date try: dt = date(puby, 6 if pubm is None else pubm, 15) dt = parse_only_date(str(dt)) mi.pubdate = dt except Exception: pass
def metadata_from_filename(name, pat=None, fallback_pat=None): if isinstance(name, bytes): name = name.decode(filesystem_encoding, 'replace') name = name.rpartition('.')[0] mi = MetaInformation(None, None) if pat is None: pat = re.compile(prefs.get('filename_pattern')) name = name.replace('_', ' ') match = pat.search(name) if match is None and fallback_pat is not None: match = fallback_pat.search(name) if match is not None: try: mi.title = match.group('title') except IndexError: pass try: au = match.group('author') aus = string_to_authors(au) if aus: mi.authors = aus if prefs['swap_author_names'] and mi.authors: def swap(a): if ',' in a: parts = a.split(',', 1) else: parts = a.split(None, 1) if len(parts) > 1: t = parts[-1] parts = parts[:-1] parts.insert(0, t) return ' '.join(parts) mi.authors = [swap(x) for x in mi.authors] except (IndexError, ValueError): pass try: mi.series = match.group('series') except IndexError: pass try: si = match.group('series_index') mi.series_index = float(si) except (IndexError, ValueError, TypeError): pass try: si = match.group('isbn') mi.isbn = si except (IndexError, ValueError): pass try: publisher = match.group('publisher') mi.publisher = publisher except (IndexError, ValueError): pass try: pubdate = match.group('published') if pubdate: from ebook_converter.utils.date import parse_only_date mi.pubdate = parse_only_date(pubdate) except: pass try: comments = match.group('comments') mi.comments = comments except (IndexError, ValueError): pass if mi.is_null('title'): mi.title = name return mi
def _parse_pubdate(root, mi, ctx): year = ctx.XPath('number(//fb:publish-info/fb:year/text())')(root) if float.is_integer(year): # only year is available, so use 2nd of June mi.pubdate = parse_only_date(str(int(year)))