Python from_base64_bytes示例，polyglot.binary.from_base64_bytes Python示例

示例#1

0

显示文件

def cdb_set_fields(ctx, rd, book_id, library_id):
    db = get_db(ctx, rd, library_id)
    if ctx.restriction_for(rd, db):
        raise HTTPForbidden('Cannot use the set fields interface with a user who has per library restrictions')
    data = load_payload_data(rd)
    try:
        changes, loaded_book_ids = data['changes'], frozenset(map(int, data.get('loaded_book_ids', ())))
        all_dirtied = bool(data.get('all_dirtied'))
        if not isinstance(changes, dict):
            raise TypeError('changes must be a dict')
    except Exception:
        raise HTTPBadRequest(
        '''Data must be of the form {'changes': {'title': 'New Title', ...}, 'loaded_book_ids':[book_id1, book_id2, ...]'}''')
    dirtied = set()
    cdata = changes.pop('cover', False)
    if cdata is not False:
        if cdata is not None:
            try:
                cdata = from_base64_bytes(cdata.split(',', 1)[-1])
            except Exception:
                raise HTTPBadRequest('Cover data is not valid base64 encoded data')
            try:
                fmt = what(None, cdata)
            except Exception:
                fmt = None
            if fmt not in ('jpeg', 'png'):
                raise HTTPBadRequest('Cover data must be either JPEG or PNG')
        dirtied |= db.set_cover({book_id: cdata})

    added_formats = changes.pop('added_formats', False)
    if added_formats:
        for data in added_formats:
            try:
                fmt = data['ext'].upper()
            except Exception:
                raise HTTPBadRequest('Format has no extension')
            if fmt:
                try:
                    fmt_data = from_base64_bytes(data['data_url'].split(',', 1)[-1])
                except Exception:
                    raise HTTPBadRequest('Format data is not valid base64 encoded data')
                if db.add_format(book_id, fmt, ReadOnlyFileBuffer(fmt_data)):
                    dirtied.add(book_id)
    removed_formats = changes.pop('removed_formats', False)
    if removed_formats:
        db.remove_formats({book_id: list(removed_formats)})
        dirtied.add(book_id)

    for field, value in iteritems(changes):
        dirtied |= db.set_field(field, {book_id: value})
    ctx.notify_changes(db.backend.library_path, metadata(dirtied))
    all_ids = dirtied if all_dirtied else (dirtied & loaded_book_ids)
    all_ids |= {book_id}
    return {bid: book_as_json(db, bid) for bid in all_ids}

示例#2

0

显示文件

文件： data_url.py 项目： j-howell/calibre

 def __call__(self, oeb, opts):
     self.log = oeb.log
     attr_path = XPath('//h:img[@src]')
     for item in oeb.spine:
         root = item.data
         if not hasattr(root, 'xpath'):
             continue
         for img in attr_path(root):
             raw = img.get('src', '')
             if not raw.startswith('data:'):
                 continue
             header, data = raw.partition(',')[0::2]
             if not header.startswith('data:image/') or not data:
                 continue
             if ';base64' in header:
                 data = re.sub(r'\s+', '', data)
                 from polyglot.binary import from_base64_bytes
                 try:
                     data = from_base64_bytes(data)
                 except Exception:
                     self.log.error('Found invalid base64 encoded data URI, ignoring it')
                     continue
             else:
                 data = urlunquote(data)
             from imghdr import what
             fmt = what(None, data)
             if not fmt:
                 self.log.warn('Image encoded as data URL has unknown format, ignoring')
                 continue
             img.set('src', item.relhref(self.convert_image_data_uri(data, fmt, oeb)))

示例#3

0

显示文件

文件： json_codec.py 项目： JimmXinu/calibre

def decode_thumbnail(tup):
    '''
    Decode an encoded thumbnail into its 3 component parts
    '''
    if tup is None:
        return None
    return (tup[0], tup[1], from_base64_bytes(tup[2]))

示例#4

0

显示文件

文件： serialize.py 项目： smdx023/calibre

def decode_metadata(x, for_json):
    from polyglot.binary import from_base64_bytes
    from calibre.ebooks.metadata.book.serialize import metadata_from_dict
    obj = metadata_from_dict(x)
    if for_json and obj.cover_data and obj.cover_data[1]:
        obj.cover_data = obj.cover_data[0], from_base64_bytes(obj.cover_data[1])
    return obj

示例#5

0

显示文件

文件： sony_cache.py 项目： JimmXinu/calibre

 def update_booklist(self, bl, bl_index):
     if bl_index not in self.record_roots:
         return
     debug_print('Updating JSON cache:', bl_index)
     playlist_map = self.build_id_playlist_map(bl_index)
     root = self.record_roots[bl_index]
     lpath_map = self.build_lpath_map(root)
     for book in bl:
         record = lpath_map.get(book.lpath, None)
         if record is not None:
             for thumbnail in record.xpath(
                     'descendant::*[local-name()="thumbnail"]'):
                 for img in thumbnail.xpath(
                         'descendant::*[local-name()="jpeg"]|'
                         'descendant::*[local-name()="png"]'):
                     if img.text:
                         try:
                             raw = from_base64_bytes(img.text.strip())
                         except Exception:
                             continue
                         book.thumbnail = raw
                         break
                 break
             book.device_collections = playlist_map.get(book.lpath, [])
     debug_print('Finished updating JSON cache:', bl_index)

示例#6

0

显示文件

def get_stored_annotations(container, bookmark_data):
    raw = bookmark_data or b''
    if not raw:
        return
    if raw.startswith(EPUB_FILE_TYPE_MAGIC):
        raw = raw[len(EPUB_FILE_TYPE_MAGIC):].replace(b'\n', b'')
        for annot in parse_annotations(from_base64_bytes(raw)):
            yield annot
        return

    from calibre.ebooks.oeb.iterator.bookmarks import parse_bookmarks
    for bm in parse_bookmarks(raw):
        if bm['type'] == 'cfi' and isinstance(bm['pos'], unicode_type):
            spine_index = (1 + bm['spine']) * 2
            epubcfi = 'epubcfi(/{}/{})'.format(spine_index,
                                               bm['pos'].lstrip('/'))
            title = bm.get('title')
            if title and title != 'calibre_current_page_bookmark':
                yield {
                    'type': 'bookmark',
                    'title': title,
                    'pos': epubcfi,
                    'pos_type': 'epubcfi',
                    'timestamp': EPOCH
                }
            else:
                yield {
                    'type': 'last-read',
                    'pos': epubcfi,
                    'pos_type': 'epubcfi',
                    'timestamp': EPOCH
                }

示例#7

0

显示文件

文件： data_url.py 项目： prajoria/calibre

 def __call__(self, oeb, opts):
     self.log = oeb.log
     attr_path = XPath('//h:img[@src]')
     for item in oeb.spine:
         root = item.data
         if not hasattr(root, 'xpath'):
             continue
         for img in attr_path(root):
             raw = img.get('src', '')
             if not raw.startswith('data:'):
                 continue
             header, data = raw.partition(',')[0::2]
             if not header.startswith('data:image/') or not data:
                 continue
             if ';base64' in header:
                 data = re.sub(r'\s+', '', data)
                 from polyglot.binary import from_base64_bytes
                 try:
                     data = from_base64_bytes(data)
                 except Exception:
                     self.log.error('Found invalid base64 encoded data URI, ignoring it')
                     continue
             else:
                 data = urlunquote(data)
             from imghdr import what
             fmt = what(None, data)
             if not fmt:
                 self.log.warn('Image encoded as data URL has unknown format, ignoring')
                 continue
             img.set('src', item.relhref(self.convert_image_data_uri(data, fmt, oeb)))

示例#8

0

显示文件

文件： cdb.py 项目： JimmXinu/calibre

def cdb_set_fields(ctx, rd, book_id, library_id):
    db = get_db(ctx, rd, library_id)
    if ctx.restriction_for(rd, db):
        raise HTTPForbidden('Cannot use the set fields interface with a user who has per library restrictions')
    data = load_payload_data(rd)
    try:
        changes, loaded_book_ids = data['changes'], frozenset(map(int, data.get('loaded_book_ids', ())))
        all_dirtied = bool(data.get('all_dirtied'))
        if not isinstance(changes, dict):
            raise TypeError('changes must be a dict')
    except Exception:
        raise HTTPBadRequest(
        '''Data must be of the form {'changes': {'title': 'New Title', ...}, 'loaded_book_ids':[book_id1, book_id2, ...]'}''')
    dirtied = set()
    cdata = changes.pop('cover', False)
    if cdata is not False:
        if cdata is not None:
            try:
                cdata = from_base64_bytes(cdata.split(',', 1)[-1])
            except Exception:
                raise HTTPBadRequest('Cover data is not valid base64 encoded data')
            try:
                fmt = what(None, cdata)
            except Exception:
                fmt = None
            if fmt not in ('jpeg', 'png'):
                raise HTTPBadRequest('Cover data must be either JPEG or PNG')
        dirtied |= db.set_cover({book_id: cdata})

    for field, value in iteritems(changes):
        dirtied |= db.set_field(field, {book_id: value})
    ctx.notify_changes(db.backend.library_path, metadata(dirtied))
    all_ids = dirtied if all_dirtied else (dirtied & loaded_book_ids)
    all_ids |= {book_id}
    return {bid: book_as_json(db, bid) for bid in all_ids}

示例#9

0

显示文件

文件： json_codec.py 项目： zyhong/calibre

def decode_thumbnail(tup):
    '''
    Decode an encoded thumbnail into its 3 component parts
    '''
    if tup is None:
        return None
    return (tup[0], tup[1], from_base64_bytes(tup[2]))

示例#10

0

显示文件

 def update_booklist(self, bl, bl_index):
     if bl_index not in self.record_roots:
         return
     debug_print('Updating JSON cache:', bl_index)
     playlist_map = self.build_id_playlist_map(bl_index)
     root = self.record_roots[bl_index]
     lpath_map = self.build_lpath_map(root)
     for book in bl:
         record = lpath_map.get(book.lpath, None)
         if record is not None:
             for thumbnail in record.xpath(
                     'descendant::*[local-name()="thumbnail"]'):
                 for img in thumbnail.xpath(
                         'descendant::*[local-name()="jpeg"]|'
                         'descendant::*[local-name()="png"]'):
                     if img.text:
                         try:
                             raw = from_base64_bytes(img.text.strip())
                         except Exception:
                             continue
                         book.thumbnail = raw
                         break
                 break
             book.device_collections = playlist_map.get(book.lpath, [])
     debug_print('Finished updating JSON cache:', bl_index)

示例#11

0

显示文件

文件： serialize.py 项目： JimmXinu/calibre

def decode_metadata(x, for_json):
    from polyglot.binary import from_base64_bytes
    from calibre.ebooks.metadata.book.serialize import metadata_from_dict
    obj = metadata_from_dict(x)
    if for_json and obj.cover_data and obj.cover_data[1]:
        obj.cover_data = obj.cover_data[0], from_base64_bytes(obj.cover_data[1])
    return obj

示例#12

0

显示文件

def download_one(tdir, timeout, progress_report, data_uri_map, url):
    try:
        purl = urlparse(url)
        data_url_key = None
        with NamedTemporaryFile(dir=tdir, delete=False) as df:
            if purl.scheme == 'file':
                path = unquote(purl.path)
                if iswindows and path.startswith('/'):
                    path = path[1:]
                src = lopen(path, 'rb')
                filename = os.path.basename(path)
                sz = (src.seek(0, os.SEEK_END), src.tell(), src.seek(0))[1]
            elif purl.scheme == 'data':
                prefix, payload = purl.path.split(',', 1)
                parts = prefix.split(';')
                if parts and parts[-1].lower() == 'base64':
                    payload = re.sub(r'\s+', '', payload)
                    payload = from_base64_bytes(payload)
                else:
                    payload = payload.encode('utf-8')
                seen_before = data_uri_map.get(payload)
                if seen_before is not None:
                    return True, (url, filename, seen_before,
                                  guess_type(seen_before))
                data_url_key = payload
                src = BytesIO(payload)
                sz = len(payload)
                ext = 'unknown'
                for x in parts:
                    if '=' not in x and '/' in x:
                        exts = mimetypes.guess_all_extensions(x)
                        if exts:
                            ext = exts[0]
                            break
                filename = 'data-uri.' + ext
            else:
                src = urlopen(url, timeout=timeout)
                filename = get_filename(purl, src)
                sz = get_content_length(src)
            progress_report(url, 0, sz)
            dest = ProgressTracker(df, url, sz, progress_report)
            with closing(src):
                shutil.copyfileobj(src, dest)
            if data_url_key is not None:
                data_uri_map[data_url_key] = dest.name
            filename = sanitize_file_name(filename)
            mt = guess_type(filename)
            if mt in OEB_DOCS:
                raise ValueError(
                    'The external resource {} looks like a HTML document ({})'.
                    format(url, filename))
            if not mt or mt == 'application/octet-stream' or '.' not in filename:
                raise ValueError(
                    'The external resource {} is not of a known type'.format(
                        url))
            return True, (url, filename, dest.name, mt)
    except Exception as err:
        return False, (url, as_unicode(err))

示例#13

0

显示文件

文件： download.py 项目： JimmXinu/calibre

def download_one(tdir, timeout, progress_report, data_uri_map, url):
    try:
        purl = urlparse(url)
        data_url_key = None
        with NamedTemporaryFile(dir=tdir, delete=False) as df:
            if purl.scheme == 'file':
                src = lopen(purl.path, 'rb')
                filename = os.path.basename(src)
                sz = (src.seek(0, os.SEEK_END), src.tell(), src.seek(0))[1]
            elif purl.scheme == 'data':
                prefix, payload = purl.path.split(',', 1)
                parts = prefix.split(';')
                if parts and parts[-1].lower() == 'base64':
                    payload = re.sub(r'\s+', '', payload)
                    payload = from_base64_bytes(payload)
                else:
                    payload = payload.encode('utf-8')
                seen_before = data_uri_map.get(payload)
                if seen_before is not None:
                    return True, (url, filename, seen_before, guess_type(seen_before))
                data_url_key = payload
                src = BytesIO(payload)
                sz = len(payload)
                ext = 'unknown'
                for x in parts:
                    if '=' not in x and '/' in x:
                        exts = mimetypes.guess_all_extensions(x)
                        if exts:
                            ext = exts[0]
                            break
                filename = 'data-uri.' + ext
            else:
                src = urlopen(url, timeout=timeout)
                filename = get_filename(purl, src)
                sz = get_content_length(src)
            progress_report(url, 0, sz)
            dest = ProgressTracker(df, url, sz, progress_report)
            with closing(src):
                shutil.copyfileobj(src, dest)
            if data_url_key is not None:
                data_uri_map[data_url_key] = dest.name
            filename = sanitize_file_name(filename)
            mt = guess_type(filename)
            if mt in OEB_DOCS:
                raise ValueError('The external resource {} looks like a HTML document ({})'.format(url, filename))
            if not mt or mt == 'application/octet-stream' or '.' not in filename:
                raise ValueError('The external resource {} is not of a known type'.format(url))
            return True, (url, filename, dest.name, mt)
    except Exception as err:
        return False, (url, as_unicode(err))

示例#14

0

显示文件

文件： web_store.py 项目： qykth-git/calibre

def main(args):
    # Ensure we can continue to function if GUI is closed
    os.environ.pop('CALIBRE_WORKER_TEMP_DIR', None)
    reset_base_dir()
    if iswindows:
        # Ensure that all instances are grouped together in the task bar. This
        # prevents them from being grouped with viewer/editor process when
        # launched from within calibre, as both use calibre-parallel.exe
        set_app_uid(STORE_DIALOG_APP_UID)

    data = args[-1]
    data = json.loads(from_base64_bytes(data))
    override = 'calibre-gui' if islinux else None
    app = Application(args, override_program_name=override)
    m = Main(data)
    m.show(), m.raise_()
    app.exec()
    del m
    del app

示例#15

0

显示文件

文件： __init__.py 项目： prajoria/calibre

def base64_decode(raw):
    from io import BytesIO
    from polyglot.binary import from_base64_bytes

    # First try the python implementation as it is faster
    try:
        return from_base64_bytes(raw)
    except Exception:
        pass

    # Try a more robust version (adapted from FBReader sources)
    A, Z, a, z, zero, nine, plus, slash, equal = bytearray(b'AZaz09+/=')
    raw = bytearray(raw)
    out = BytesIO()
    pos = 0
    while pos < len(raw):
        tot = 0
        i = 0
        while i < 4 and pos < len(raw):
            byt = raw[pos]
            pos += 1
            num = 0
            if A <= byt <= Z:
                num = byt - A
            elif a <= byt <= z:
                num = byt - a + 26
            elif zero <= byt <= nine:
                num = byt - zero + 52
            else:
                num = {plus:62, slash:63, equal:64}.get(byt, None)
                if num is None:
                    # Ignore this byte
                    continue
            tot += num << (6 * (3 - i))
            i += 1
        triple = bytearray(3)
        for j in (2, 1, 0):
            triple[j] = tot & 0xff
            tot >>= 8
        out.write(bytes(triple))
    return out.getvalue()

示例#16

0

显示文件

文件： download_thread.py 项目： onyx-Sean/calibre

def decode_data_url(url):
    return from_base64_bytes(url.partition(',')[2])

示例#17

0

显示文件

文件： simple.py 项目： zyhong/calibre

 def process_images(self, soup, baseurl):
     diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
     if not os.path.exists(diskpath):
         os.mkdir(diskpath)
     c = 0
     for tag in soup.findAll('img', src=True):
         iurl = tag['src']
         if iurl.startswith('data:image/'):
             try:
                 data = from_base64_bytes(iurl.partition(',')[-1])
             except Exception:
                 self.log.exception('Failed to decode embedded image')
                 continue
         else:
             if callable(self.image_url_processor):
                 iurl = self.image_url_processor(baseurl, iurl)
             if not urlsplit(iurl).scheme:
                 iurl = urljoin(baseurl, iurl, False)
             with self.imagemap_lock:
                 if iurl in self.imagemap:
                     tag['src'] = self.imagemap[iurl]
                     continue
             try:
                 data = self.fetch_url(iurl)
                 if data == b'GIF89a\x01':
                     # Skip empty GIF files as PIL errors on them anyway
                     continue
             except Exception:
                 self.log.exception('Could not fetch image ', iurl)
                 continue
         c += 1
         fname = ascii_filename('img' + unicode_type(c))
         data = self.preprocess_image_ext(
             data, iurl) if self.preprocess_image_ext is not None else data
         if data is None:
             continue
         itype = what(None, data)
         if itype == 'svg' or (itype is None and b'<svg' in data[:1024]):
             # SVG image
             imgpath = os.path.join(diskpath, fname + '.svg')
             with self.imagemap_lock:
                 self.imagemap[iurl] = imgpath
             with open(imgpath, 'wb') as x:
                 x.write(data)
             tag['src'] = imgpath
         else:
             try:
                 # Ensure image is valid
                 img = image_from_data(data)
                 if itype not in {'png', 'jpg', 'jpeg'}:
                     itype = 'png' if itype == 'gif' else 'jpeg'
                     data = image_to_data(img, fmt=itype)
                 if self.compress_news_images and itype in {'jpg', 'jpeg'}:
                     try:
                         data = self.rescale_image(data)
                     except Exception:
                         self.log.exception('failed to compress image ' +
                                            iurl)
                 # Moon+ apparently cannot handle .jpeg files
                 if itype == 'jpeg':
                     itype = 'jpg'
                 imgpath = os.path.join(diskpath, fname + '.' + itype)
                 with self.imagemap_lock:
                     self.imagemap[iurl] = imgpath
                 with open(imgpath, 'wb') as x:
                     x.write(data)
                 tag['src'] = imgpath
             except Exception:
                 traceback.print_exc()
                 continue

示例#18

0

显示文件

文件： kfx.py 项目： jimman2003/calibre

def read_metadata_kfx(stream, read_cover=True):
    ' Read the metadata.kfx file that is found in the sdr book folder for KFX files '
    c = Container(stream.read())
    m = extract_metadata(c.decode())

    # dump_metadata(m)

    def has(x):
        return m[x] and m[x][0]

    def get(x, single=True):
        ans = m[x]
        if single:
            ans = clean_xml_chars(ans[0]) if ans else ''
        else:
            ans = [clean_xml_chars(y) for y in ans]
        return ans

    title = get('title') or _('Unknown')
    authors = get('author', False) or [_('Unknown')]
    auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$')

    def fix_author(x):
        if tweaks['author_sort_copy_method'] != 'copy':
            m = auth_pat.match(x.strip())
            if m is not None:
                return m.group(2) + ' ' + m.group(1)
        return x

    unique_authors = []  # remove duplicates while retaining order
    for f in [fix_author(x) for x in authors]:
        if f not in unique_authors:
            unique_authors.append(f)

    mi = Metadata(title, unique_authors)
    if has('author'):
        mi.author_sort = get('author')
    if has('ASIN'):
        mi.set_identifier('mobi-asin', get('ASIN'))
    elif has('content_id'):
        mi.set_identifier('mobi-asin', get('content_id'))
    if has('languages'):
        langs = list(
            filter(None,
                   (canonicalize_lang(x) for x in get('languages', False))))
        if langs:
            mi.languages = langs
    if has('issue_date'):
        try:
            mi.pubdate = parse_only_date(get('issue_date'))
        except Exception:
            pass
    if has('publisher') and get('publisher') != 'Unknown':
        mi.publisher = get('publisher')
    if read_cover and m[COVER_KEY]:
        try:
            data = from_base64_bytes(m[COVER_KEY])
            fmt, w, h = identify(data)
        except Exception:
            w, h, fmt = 0, 0, None
        if fmt and w > -1 and h > -1:
            mi.cover_data = (fmt, data)

    return mi

示例#19

0

显示文件

文件： download_thread.py 项目： JimmXinu/calibre

def decode_data_url(url):
    return from_base64_bytes(url.partition(',')[2])

示例#20

0

显示文件

文件： simple.py 项目： JimmXinu/calibre

 def process_images(self, soup, baseurl):
     diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
     if not os.path.exists(diskpath):
         os.mkdir(diskpath)
     c = 0
     for tag in soup.findAll('img', src=True):
         iurl = tag['src']
         if iurl.startswith('data:image/'):
             try:
                 data = from_base64_bytes(iurl.partition(',')[-1])
             except Exception:
                 self.log.exception('Failed to decode embedded image')
                 continue
         else:
             if callable(self.image_url_processor):
                 iurl = self.image_url_processor(baseurl, iurl)
             if not urlsplit(iurl).scheme:
                 iurl = urljoin(baseurl, iurl, False)
             with self.imagemap_lock:
                 if iurl in self.imagemap:
                     tag['src'] = self.imagemap[iurl]
                     continue
             try:
                 data = self.fetch_url(iurl)
                 if data == 'GIF89a\x01':
                     # Skip empty GIF files as PIL errors on them anyway
                     continue
             except Exception:
                 self.log.exception('Could not fetch image ', iurl)
                 continue
         c += 1
         fname = ascii_filename('img'+str(c))
         data = self.preprocess_image_ext(data, iurl) if self.preprocess_image_ext is not None else data
         if data is None:
             continue
         itype = what(None, data)
         if itype == 'svg' or (itype is None and b'<svg' in data[:1024]):
             # SVG image
             imgpath = os.path.join(diskpath, fname+'.svg')
             with self.imagemap_lock:
                 self.imagemap[iurl] = imgpath
             with open(imgpath, 'wb') as x:
                 x.write(data)
             tag['src'] = imgpath
         else:
             try:
                 # Ensure image is valid
                 img = image_from_data(data)
                 if itype not in {'png', 'jpg', 'jpeg'}:
                     itype = 'png' if itype == 'gif' else 'jpeg'
                     data = image_to_data(img, fmt=itype)
                 if self.compress_news_images and itype in {'jpg','jpeg'}:
                     try:
                         data = self.rescale_image(data)
                     except Exception:
                         self.log.exception('failed to compress image '+iurl)
                 # Moon+ apparently cannot handle .jpeg files
                 if itype == 'jpeg':
                     itype = 'jpg'
                 imgpath = os.path.join(diskpath, fname+'.'+itype)
                 with self.imagemap_lock:
                     self.imagemap[iurl] = imgpath
                 with open(imgpath, 'wb') as x:
                     x.write(data)
                 tag['src'] = imgpath
             except Exception:
                 traceback.print_exc()
                 continue

示例#21

0

显示文件

文件： kfx.py 项目： j-howell/calibre

def read_metadata_kfx(stream, read_cover=True):
    ' Read the metadata.kfx file that is found in the sdr book folder for KFX files '
    c = Container(stream.read())
    m = extract_metadata(c.decode())
    # dump_metadata(m)

    def has(x):
        return m[x] and m[x][0]

    def get(x, single=True):
        ans = m[x]
        if single:
            ans = clean_xml_chars(ans[0]) if ans else ''
        else:
            ans = [clean_xml_chars(y) for y in ans]
        return ans

    title = get('title') or _('Unknown')
    authors = get('author', False) or [_('Unknown')]
    auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$')

    def fix_author(x):
        if tweaks['author_sort_copy_method'] != 'copy':
            m = auth_pat.match(x.strip())
            if m is not None:
                return m.group(2) + ' ' + m.group(1)
        return x

    unique_authors = []     # remove duplicates while retaining order
    for f in [fix_author(x) for x in authors]:
        if f not in unique_authors:
            unique_authors.append(f)

    mi = Metadata(title, unique_authors)
    if has('author'):
        mi.author_sort = get('author')
    if has('ASIN'):
        mi.set_identifier('mobi-asin', get('ASIN'))
    elif has('content_id'):
        mi.set_identifier('mobi-asin', get('content_id'))
    if has('languages'):
        langs = list(filter(None, (canonicalize_lang(x) for x in get('languages', False))))
        if langs:
            mi.languages = langs
    if has('issue_date'):
        try:
            mi.pubdate = parse_only_date(get('issue_date'))
        except Exception:
            pass
    if has('publisher') and get('publisher') != 'Unknown':
        mi.publisher = get('publisher')
    if read_cover and m[COVER_KEY]:
        try:
            data = from_base64_bytes(m[COVER_KEY])
            fmt, w, h = identify(data)
        except Exception:
            w, h, fmt = 0, 0, None
        if fmt and w > -1 and h > -1:
            mi.cover_data = (fmt, data)

    return mi