Exemplo n.º 1
0
def change_font_in_declaration(style, old_name, new_name=None):
    changed = False
    ff = style.getProperty('font-family')
    if ff is not None:
        fams = parse_font_family(css_text(ff.propertyValue))
        nfams = list(filter(None, [new_name if x == old_name else x for x in fams]))
        if fams != nfams:
            if nfams:
                ff.propertyValue.cssText = serialize_font_family(nfams)
            else:
                style.removeProperty(ff.name)
            changed = True
    ff = style.getProperty('font')
    if ff is not None:
        props = parse_font(css_text(ff.propertyValue))
        fams = props.get('font-family') or []
        nfams = list(filter(None, [new_name if x == old_name else x for x in fams]))
        if fams != nfams:
            props['font-family'] = nfams
            if nfams:
                ff.propertyValue.cssText = serialize_font(props)
            else:
                style.removeProperty(ff.name)
            changed = True
    return changed
Exemplo n.º 2
0
def random_user_agent(choose=None, allow_ie=True):
    from calibre.utils.random_ua import common_user_agents
    ua_list = common_user_agents()
    ua_list = list(filter(lambda x: not is_mobile_ua(x), ua_list))
    if not allow_ie:
        ua_list = list(filter(lambda x: 'Trident/' not in x and 'Edge/' not in x, ua_list))
    return random.choice(ua_list) if choose is None else ua_list[choose]
Exemplo n.º 3
0
 def __init__(self, libraries, opts, testing=False, notify_changes=None):
     self.opts = opts
     self.library_broker = libraries if isinstance(libraries, LibraryBroker) else LibraryBroker(libraries)
     self.testing = testing
     self.lock = Lock()
     self.user_manager = UserManager(opts.userdb)
     self.ignored_fields = frozenset(filter(None, (x.strip() for x in (opts.ignored_fields or '').split(','))))
     self.displayed_fields = frozenset(filter(None, (x.strip() for x in (opts.displayed_fields or '').split(','))))
     self._notify_changes = notify_changes
Exemplo n.º 4
0
def render_categories(opts, db, category_data):
    items = {}
    with db.safe_read_lock:
        root, node_id_map, category_nodes, recount_nodes = create_toplevel_tree(category_data, items, db.field_metadata, opts)
        fillout_tree(root, items, node_id_map, category_nodes, category_data, db.field_metadata, opts, db.fields['rating'].book_value_map)
    for node in recount_nodes:
        item = items[node['id']]
        item['count'] = sum(1 for x in iternode_descendants(node) if not items[x['id']].get('is_category', False))
    if opts.hidden_categories:
        # We have to remove hidden categories after all processing is done as
        # items from a hidden category could be in a user category
        root['children'] = list(filter((lambda child:items[child['id']]['category'] not in opts.hidden_categories), root['children']))
    if opts.hide_empty_categories:
        root['children'] = list(filter((lambda child:items[child['id']]['count'] > 0), root['children']))
    return {'root':root, 'item_map': items}
Exemplo n.º 5
0
    def extract_images(self, picts):
        from calibre.utils.imghdr import what
        self.log('Extracting images...')

        with open(picts, 'rb') as f:
            raw = f.read()
        picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
        hex = re.compile(r'[^a-fA-F0-9]')
        encs = [hex.sub('', pict) for pict in picts]

        count = 0
        imap = {}
        for enc in encs:
            if len(enc) % 2 == 1:
                enc = enc[:-1]
            data = enc.decode('hex')
            fmt = what(None, data)
            if fmt is None:
                fmt = 'wmf'
            count += 1
            name = u'%04d.%s' % (count, fmt)
            with open(name, 'wb') as f:
                f.write(data)
            imap[count] = name
            # with open(name+'.hex', 'wb') as f:
            #     f.write(enc)
        return self.convert_images(imap)
Exemplo n.º 6
0
def polish_index_markup(index, blocks):
    # Blocks are in reverse order at this point
    path_map = {}
    ldict = {}
    for block in blocks:
        cls = block.get('class', '') or ''
        block.set('class', (cls + ' index-entry').lstrip())
        a = block.xpath('descendant::a[1]')
        text = ''
        if a:
            text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode_type).strip()
        if ':' in text:
            path_map[block] = parts = list(filter(None, (x.strip() for x in text.split(':'))))
            if len(parts) > 1:
                split_up_block(block, a[0], text, parts, ldict)
        else:
            # try using a span all the time
            path_map[block] = [text]
            parent = a[0].getparent()
            span = parent.makeelement('span', style='display:block; margin-left: 0em')
            parent.append(span)
            span.append(a[0])
            ldict[span] = 0

        for br in block.xpath('descendant::br'):
            br.tail = None

    # We want a single block for each main entry
    prev_block = blocks[0]
    for block in blocks[1:]:
        pp, pn = path_map[prev_block], path_map[block]
        if pp[0] == pn[0]:
            merge_blocks(prev_block, block, 0, 0, pn, ldict)
        else:
            prev_block = block
Exemplo n.º 7
0
    def __call__(self, query, limit=None):
        query = normalize('NFC', unicode_type(query))
        with wlock:
            for i, scorer in enumerate(self.scorers):
                workers[0].requests.put((i, scorer, query))
            if self.sort_keys is None:
                self.sort_keys = {
                    i: primary_sort_key(x)
                    for i, x in enumerate(self.items)
                }
            num = len(self.task_maps)
            scores, positions = {}, {}
            error = None
            while num > 0:
                ok, x = workers[0].results.get()
                num -= 1
                if ok:
                    task_num, vals = x
                    task_map = self.task_maps[task_num]
                    for i, (score, pos) in enumerate(vals):
                        item = task_map[i]
                        scores[item] = score
                        positions[item] = pos
                else:
                    error = x

        if error is not None:
            raise Exception('Failed to score items: %s' % error)
        items = sorted(((-scores[i], item, positions[i])
                        for i, item in enumerate(self.items)),
                       key=itemgetter(0))
        if limit is not None:
            del items[limit:]
        return OrderedDict(x[1:] for x in filter(itemgetter(0), items))
Exemplo n.º 8
0
def parse_uri(uri, parse_query=True):
    scheme, authority, path = parse_request_uri(uri)
    if path is None:
        raise HTTPSimpleResponse(http_client.BAD_REQUEST, "No path component")
    if b'#' in path:
        raise HTTPSimpleResponse(http_client.BAD_REQUEST, "Illegal #fragment in Request-URI.")

    if scheme:
        try:
            scheme = scheme.decode('ascii')
        except ValueError:
            raise HTTPSimpleResponse(http_client.BAD_REQUEST, 'Un-decodeable scheme')

    path, qs = path.partition(b'?')[::2]
    if parse_query:
        try:
            query = MultiDict.create_from_query_string(qs)
        except Exception:
            raise HTTPSimpleResponse(http_client.BAD_REQUEST, 'Unparseable query string')
    else:
        query = None

    try:
        path = '%2F'.join(unquote(x).decode('utf-8') for x in quoted_slash.split(path))
    except ValueError as e:
        raise HTTPSimpleResponse(http_client.BAD_REQUEST, as_unicode(e))
    path = tuple(filter(None, (x.replace('%2F', '/') for x in path.split('/'))))

    return scheme, path, query
Exemplo n.º 9
0
    def process_dir(self, dirpath, filenames, book_id):
        book_id = int(book_id)
        formats = list(filter(self.is_ebook_file, filenames))
        fmts    = [os.path.splitext(x)[1][1:].upper() for x in formats]
        sizes   = [os.path.getsize(os.path.join(dirpath, x)) for x in formats]
        names   = [os.path.splitext(x)[0] for x in formats]
        opf = os.path.join(dirpath, 'metadata.opf')
        mi = OPF(opf, basedir=dirpath).to_book_metadata()
        timestamp = os.path.getmtime(opf)
        path = os.path.relpath(dirpath, self.src_library_path).replace(os.sep,
                '/')

        if int(mi.application_id) == book_id:
            self.books.append({
                'mi': mi,
                'timestamp': timestamp,
                'formats': list(zip(fmts, sizes, names)),
                'id': book_id,
                'dirpath': dirpath,
                'path': path,
            })
        else:
            self.mismatched_dirs.append(dirpath)

        alm = mi.get('author_link_map', {})
        for author, link in iteritems(alm):
            existing_link, timestamp = self.authors_links.get(author, (None, None))
            if existing_link is None or existing_link != link and timestamp < mi.timestamp:
                self.authors_links[author] = (link, mi.timestamp)
Exemplo n.º 10
0
    def get_overrides(self, r, c, num_of_rows, num_of_cols_in_row):
        'List of possible overrides for the given para'
        overrides = ['wholeTable']

        def divisor(m, n):
            return (m - (m % n)) // n
        if c is not None:
            odd_column_band = (divisor(c, self.table_style.col_band_size) % 2) == 1
            overrides.append('band%dVert' % (1 if odd_column_band else 2))
        odd_row_band = (divisor(r, self.table_style.row_band_size) % 2) == 1
        overrides.append('band%dHorz' % (1 if odd_row_band else 2))

        # According to the OOXML spec columns should have higher override
        # priority than rows, but Word seems to do it the other way around.
        if c is not None:
            if c == 0:
                overrides.append('firstCol')
            if c >= num_of_cols_in_row - 1:
                overrides.append('lastCol')
        if r == 0:
            overrides.append('firstRow')
        if r >= num_of_rows - 1:
            overrides.append('lastRow')
        if c is not None:
            if r == 0:
                if c == 0:
                    overrides.append('nwCell')
                if c == num_of_cols_in_row - 1:
                    overrides.append('neCell')
            if r == num_of_rows - 1:
                if c == 0:
                    overrides.append('swCell')
                if c == num_of_cols_in_row - 1:
                    overrides.append('seCell')
        return tuple(filter(self.override_allowed, overrides))
Exemplo n.º 11
0
def find_identical_books(mi, data):
    author_map, aid_map, title_map, lang_map = data
    found_books = None
    for a in mi.authors:
        author_ids = author_map.get(icu_lower(a))
        if author_ids is None:
            return set()
        books_by_author = {book_id for aid in author_ids for book_id in aid_map.get(aid, ())}
        if found_books is None:
            found_books = books_by_author
        else:
            found_books &= books_by_author
        if not found_books:
            return set()

    ans = set()
    titleq = fuzzy_title(mi.title)
    for book_id in found_books:
        title = title_map.get(book_id, '')
        if fuzzy_title(title) == titleq:
            ans.add(book_id)

    langq = tuple(filter(lambda x: x and x != 'und', map(canonicalize_lang, mi.languages or ())))
    if not langq:
        return ans

    def lang_matches(book_id):
        book_langq = lang_map.get(book_id)
        return not book_langq or langq == book_langq

    return {book_id for book_id in ans if lang_matches(book_id)}
Exemplo n.º 12
0
def multiple_sequences(expr, root):
    # Get all values for sequence elements matching expr, ensuring the returned
    # list contains distinct non-null elements preserving their order.
    ans = []
    for item in XPath(expr)(root):
        ans += list(read_sequence(item))
    return list(filter(None, uniq(ans)))
Exemplo n.º 13
0
    def tokenize(self):
        """Main class for handling other methods. Reads the file \
        , uses method self.sub_reg to make basic substitutions,\
        and process tokens by itself"""
        # read
        with open_for_read(self.__file) as read_obj:
            input_file = read_obj.read()

        # process simple replacements and split giving us a correct list
        # remove '' and \n in the process
        tokens = self.__sub_reg_split(input_file)
        # correct unicode
        tokens = map(self.__unicode_process, tokens)
        # remove empty items created by removing \uc
        tokens = list(filter(lambda x: len(x) > 0, tokens))

        # write
        with open_for_write(self.__write_to) as write_obj:
            write_obj.write('\n'.join(tokens))
        # Move and copy
        copy_obj = copy.Copy(bug_handler=self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "tokenize.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
Exemplo n.º 14
0
def create_themeball(report, progress=None, abort=None):
    pool = ThreadPool(processes=cpu_count())
    buf = BytesIO()
    num = count()
    error_occurred = Event()

    def optimize(name):
        if abort is not None and abort.is_set():
            return
        if error_occurred.is_set():
            return
        try:
            i = next(num)
            if progress is not None:
                progress(i, _('Optimizing %s') % name)
            srcpath = os.path.join(report.path, name)
            ext = srcpath.rpartition('.')[-1].lower()
            if ext == 'png':
                optimize_png(srcpath)
            elif ext in ('jpg', 'jpeg'):
                optimize_jpeg(srcpath)
        except Exception:
            return sys.exc_info()

    errors = tuple(filter(None, pool.map(optimize, tuple(report.name_map))))
    pool.close(), pool.join()
    if abort is not None and abort.is_set():
        return
    if errors:
        e = errors[0]
        reraise(*e)

    if progress is not None:
        progress(next(num), _('Creating theme file'))
    with ZipFile(buf, 'w') as zf:
        for name in report.name_map:
            srcpath = os.path.join(report.path, name)
            with lopen(srcpath, 'rb') as f:
                zf.writestr(name, f.read(), compression=ZIP_STORED)
    buf.seek(0)
    out = BytesIO()
    if abort is not None and abort.is_set():
        return None, None
    if progress is not None:
        progress(next(num), _('Compressing theme file'))
    compress(buf, out, level=9)
    buf = BytesIO()
    prefix = report.name
    if abort is not None and abort.is_set():
        return None, None
    with ZipFile(buf, 'w') as zf:
        with lopen(os.path.join(report.path, THEME_METADATA), 'rb') as f:
            zf.writestr(prefix + '/' + THEME_METADATA, f.read())
        zf.writestr(prefix + '/' + THEME_COVER, create_cover(report))
        zf.writestr(prefix + '/' + 'icons.zip.xz', out.getvalue(), compression=ZIP_STORED)
    if progress is not None:
        progress(next(num), _('Finished'))
    return buf.getvalue(), prefix
Exemplo n.º 15
0
def ACQUISITION_ENTRY(book_id, updated, request_context):
    field_metadata = request_context.db.field_metadata
    mi = request_context.db.get_metadata(book_id)
    extra = []
    if mi.rating > 0:
        rating = rating_to_stars(mi.rating)
        extra.append(_('RATING: %s<br />')%rating)
    if mi.tags:
        extra.append(_('TAGS: %s<br />')%xml(format_tag_string(mi.tags, None)))
    if mi.series:
        extra.append(_('SERIES: %(series)s [%(sidx)s]<br />')%
                dict(series=xml(mi.series),
                sidx=fmt_sidx(float(mi.series_index))))
    for key in filter(request_context.ctx.is_field_displayable, field_metadata.ignorable_field_keys()):
        name, val = mi.format_field(key)
        if val:
            fm = field_metadata[key]
            datatype = fm['datatype']
            if datatype == 'text' and fm['is_multiple']:
                extra.append('%s: %s<br />'%
                             (xml(name),
                              xml(format_tag_string(val,
                                    fm['is_multiple']['ui_to_list'],
                                    joinval=fm['is_multiple']['list_to_ui']))))
            elif datatype == 'comments' or (fm['datatype'] == 'composite' and fm['display'].get('contains_html', False)):
                extra.append('%s: %s<br />'%(xml(name), comments_to_html(unicode_type(val))))
            else:
                extra.append('%s: %s<br />'%(xml(name), xml(unicode_type(val))))
    if mi.comments:
        comments = comments_to_html(mi.comments)
        extra.append(comments)
    if extra:
        extra = html_to_lxml('\n'.join(extra))
    ans = E.entry(TITLE(mi.title), E.author(E.name(authors_to_string(mi.authors))), ID('urn:uuid:' + mi.uuid), UPDATED(mi.last_modified),
                  E.published(mi.timestamp.isoformat()))
    if mi.pubdate and not is_date_undefined(mi.pubdate):
        ans.append(ans.makeelement('{%s}date' % DC_NS))
        ans[-1].text = mi.pubdate.isoformat()
    if len(extra):
        ans.append(E.content(extra, type='xhtml'))
    get = partial(request_context.ctx.url_for, '/get', book_id=book_id, library_id=request_context.library_id)
    if mi.formats:
        fm = mi.format_metadata
        for fmt in mi.formats:
            fmt = fmt.lower()
            mt = guess_type('a.'+fmt)[0]
            if mt:
                link = E.link(type=mt, href=get(what=fmt), rel="http://opds-spec.org/acquisition")
                ffm = fm.get(fmt.upper())
                if ffm:
                    link.set('length', str(ffm['size']))
                    link.set('mtime', ffm['mtime'].isoformat())
                ans.append(link)
    ans.append(E.link(type='image/jpeg', href=get(what='cover'), rel="http://opds-spec.org/cover"))
    ans.append(E.link(type='image/jpeg', href=get(what='thumb'), rel="http://opds-spec.org/thumbnail"))

    return ans
Exemplo n.º 16
0
def map_authors(authors, rules=()):
    if not authors:
        return []
    if not rules:
        return list(authors)
    ans = []
    for a in authors:
        ans.extend(apply_rules(a, rules))
    return uniq(list(filter(None, ans)))
Exemplo n.º 17
0
 def change_restriction(username):
     r = m.restrictions(username)
     if r is None:
         raise SystemExit('The user {} does not exist'.format(username))
     if r['allowed_library_names']:
         prints(
             _('{} is currently only allowed to access the libraries named: {}')
             .format(username, ', '.join(r['allowed_library_names'])))
     if r['blocked_library_names']:
         prints(
             _('{} is currently not allowed to access the libraries named: {}')
             .format(username, ', '.join(r['blocked_library_names'])))
     if r['library_restrictions']:
         prints(
             _('{} has the following additional per-library restrictions:')
             .format(username))
         for k, v in iteritems(r['library_restrictions']):
             prints(k + ':', v)
     else:
         prints(_('{} has no additional per-library restrictions').format(username))
     c = choice(
         choices=[
             _('Allow access to all libraries'),
             _('Allow access to only specified libraries'),
             _('Allow access to all, except specified libraries'),
             _('Change per-library restrictions'),
             _('Cancel')])
     if c == 0:
         m.update_user_restrictions(username, {})
     elif c == 3:
         while True:
             library = get_input(_('Enter the name of the library:'))
             if not library:
                 break
             prints(
                 _(
                     'Enter a search expression, access will be granted only to books matching this expression.'
                     ' An empty expression will grant access to all books.'))
             plr = get_input(_('Search expression:'))
             if plr:
                 r['library_restrictions'][library] = plr
             else:
                 r['library_restrictions'].pop(library, None)
             m.update_user_restrictions(username, r)
             if get_input(_('Another restriction?') + ' (y/n):') != 'y':
                 break
     elif c == 4:
         pass
     else:
         names = get_input(_('Enter a comma separated list of library names:'))
         names = list(filter(None, [x.strip() for x in names.split(',')]))
         w = 'allowed_library_names' if c == 1 else 'blocked_library_names'
         t = _('Allowing access only to libraries: {}') if c == 1 else _(
             'Allowing access to all libraries, except: {}')
         prints(t.format(', '.join(names)))
         m.update_user_restrictions(username, {w: names})
Exemplo n.º 18
0
def map_tags(tags, rules=()):
    if not tags:
        return []
    if not rules:
        return list(tags)
    rules = [(r, matcher(r)) for r in rules]
    ans = []
    for t in tags:
        ans.extend(apply_rules(t, rules))
    return uniq(list(filter(None, ans)))
Exemplo n.º 19
0
 def completer(text, num):
     if self.completions is None:
         self.to_repl.put(('complete', text))
         self.completions = list(filter(None, self.get_from_repl()))
         if not self.completions:
             return None
     try:
         return self.completions[num]
     except (IndexError, TypeError, AttributeError, KeyError):
         self.completions = None
Exemplo n.º 20
0
def custom_dictionaries(reread=False):
    global _custom
    if _custom is None or reread:
        dics = []
        for lc in glob.glob(os.path.join(config_dir, 'dictionaries', '*/locales')):
            locales = list(filter(None, open(lc, 'rb').read().decode('utf-8').splitlines()))
            try:
                name, locale, locales = locales[0], locales[1], locales[1:]
            except IndexError:
                continue
            base = os.path.dirname(lc)
            ploc = parse_lang_code(locale)
            if ploc.countrycode is None:
                continue
            dics.append(Dictionary(
                ploc, frozenset(filter(lambda x:x.countrycode is not None, map(parse_lang_code, locales))), os.path.join(base, '%s.dic' % locale),
                os.path.join(base, '%s.aff' % locale), False, name, os.path.basename(base)))
        _custom = frozenset(dics)
    return _custom
Exemplo n.º 21
0
def normalize_languages(opf_languages, mi_languages):
    ' Preserve original country codes and use 2-letter lang codes where possible '
    def parse(x):
        try:
            return parse_lang_code(x)
        except ValueError:
            return None
    opf_languages = filter(None, map(parse, opf_languages))
    cc_map = {c.langcode:c.countrycode for c in opf_languages}
    mi_languages = filter(None, map(parse, mi_languages))

    def norm(x):
        lc = x.langcode
        cc = x.countrycode or cc_map.get(lc, None)
        lc = lang_as_iso639_1(lc) or lc
        if cc:
            lc += '-' + cc
        return lc
    return list(map(norm, mi_languages))
Exemplo n.º 22
0
 def line_numbers(self):
     if self._line_numbers is None:
         def atoi(x):
             try:
                 ans = int(x)
             except (TypeError, ValueError):
                 ans = None
             return ans
         val = self.mainFrame().evaluateJavaScript('window.calibre_preview_integration.line_numbers()')
         self._line_numbers = sorted(uniq(list(filter(lambda x:x is not None, map(atoi, val)))))
     return self._line_numbers
Exemplo n.º 23
0
def set_tags(root, prefixes, refines, val):
    for dc in XPath('./opf:metadata/dc:subject')(root):
        remove_element(dc, refines)
    m = XPath('./opf:metadata')(root)[0]
    if val:
        val = uniq(list(filter(None, val)))
        for x in val:
            c = m.makeelement(DC('subject'))
            c.text = normalize_whitespace(x)
            if c.text:
                m.append(c)
Exemplo n.º 24
0
def contractions(col=None):
    global _cmap
    col = col or _collator
    if col is None:
        col = collator()
    ans = _cmap.get(collator, None)
    if ans is None:
        ans = col.contractions()
        ans = frozenset(filter(None, ans))
        _cmap[col] = ans
    return ans
Exemplo n.º 25
0
def builtin_dictionaries():
    global _builtins
    if _builtins is None:
        dics = []
        for lc in glob.glob(os.path.join(P('dictionaries', allow_user_override=False), '*/locales')):
            locales = list(filter(None, open(lc, 'rb').read().decode('utf-8').splitlines()))
            locale = locales[0]
            base = os.path.dirname(lc)
            dics.append(Dictionary(
                parse_lang_code(locale), frozenset(map(parse_lang_code, locales)), os.path.join(base, '%s.dic' % locale),
                os.path.join(base, '%s.aff' % locale), True, None, None))
        _builtins = frozenset(dics)
    return _builtins
Exemplo n.º 26
0
def remove_property_value(prop, predicate):
    ''' Remove the Values that match the predicate from this property. If all
    values of the property would be removed, the property is removed from its
    parent instead. Note that this means the property must have a parent (a
    CSSStyleDeclaration). '''
    removed_vals = list(filter(predicate, prop.propertyValue))
    if len(removed_vals) == len(prop.propertyValue):
        prop.parent.removeProperty(prop.name)
    else:
        x = css_text(prop.propertyValue)
        for v in removed_vals:
            x = x.replace(css_text(v), '').strip()
        prop.propertyValue.cssText = x
    return bool(removed_vals)
Exemplo n.º 27
0
    def run(self, archive):
        from calibre.utils.zipfile import ZipFile
        is_rar = archive.lower().endswith('.rar')
        if is_rar:
            from calibre.utils.unrar import extract_member, names
        else:
            zf = ZipFile(archive, 'r')

        if is_rar:
            fnames = list(names(archive))
        else:
            fnames = zf.namelist()

        def fname_ok(fname):
            bn = os.path.basename(fname).lower()
            if bn == 'thumbs.db':
                return False
            if '.' not in bn:
                return False
            if bn.rpartition('.')[-1] in {'diz', 'nfo'}:
                return False
            if '__MACOSX' in fname.split('/'):
                return False
            return True

        fnames = list(filter(fname_ok, fnames))
        if is_comic(fnames):
            ext = '.cbr' if is_rar else '.cbz'
            of = self.temporary_file('_archive_extract'+ext)
            with open(archive, 'rb') as f:
                of.write(f.read())
            of.close()
            return of.name
        if len(fnames) > 1 or not fnames:
            return archive
        fname = fnames[0]
        ext = os.path.splitext(fname)[1][1:]
        if ext.lower() not in {
                'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb',
                'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}:
            return archive

        of = self.temporary_file('_archive_extract.'+ext)
        with closing(of):
            if is_rar:
                data = extract_member(archive, match=None, name=fname)[1]
                of.write(data)
            else:
                of.write(zf.read(fname))
        return of.name
Exemplo n.º 28
0
def load_icon_resource(icon_resource, as_data=False, size=ICON_SIZE):
    if not icon_resource:
        return
    parts = tuple(filter(None, re.split(r',([-0-9]+$)', icon_resource)))
    if len(parts) != 2:
        return
    module, index = parts
    index = int(index)
    if module.startswith('"') and module.endswith('"'):
        module = split_commandline(module)[0]
    try:
        return load_icon(module, index, as_data=as_data, size=size)
    except Exception:
        return simple_load_icon(module, index, as_data=as_data, size=size)
Exemplo n.º 29
0
 def __sub_reg_split(self,input_file):
     input_file = self.__replace_spchar.mreplace(input_file)
     # this is for older RTF
     input_file = self.__par_exp.sub(r'\n\\par \n', input_file)
     input_file = self.__cwdigit_exp.sub(r"\g<1>\n\g<2>", input_file)
     input_file = self.__cs_ast.sub(r"\g<1>", input_file)
     input_file = self.__ms_hex_exp.sub(r"\\mshex0\g<1> ", input_file)
     input_file = self.__utf_ud.sub(r"\\{\\uc0 \g<1>\\}", input_file)
     # remove \n in bin data
     input_file = self.__bin_exp.sub(lambda x:
                                     x.group().replace('\n', '') + '\n', input_file)
     # split
     tokens = re.split(self.__splitexp, input_file)
     # remove empty tokens and \n
     return list(filter(lambda x: len(x) > 0 and x != '\n', tokens))
Exemplo n.º 30
0
def get_plugin_updates_available(raise_error=False):
    '''
    API exposed to read whether there are updates available for any
    of the installed user plugins.
    Returns None if no updates found
    Returns list(DisplayPlugin) of plugins installed that have a new version
    '''
    if not has_external_plugins():
        return None
    display_plugins = read_available_plugins(raise_error=raise_error)
    if display_plugins:
        update_plugins = list(filter(filter_upgradeable_plugins, display_plugins))
        if len(update_plugins) > 0:
            return update_plugins
    return None
Exemplo n.º 31
0
def polish_index_markup(index, blocks):
    # Blocks are in reverse order at this point
    path_map = {}
    ldict = {}
    for block in blocks:
        cls = block.get('class', '') or ''
        block.set('class', (cls + ' index-entry').lstrip())
        a = block.xpath('descendant::a[1]')
        text = ''
        if a:
            text = etree.tostring(a[0],
                                  method='text',
                                  with_tail=False,
                                  encoding='unicode').strip()
        if ':' in text:
            path_map[block] = parts = list(
                filter(None, (x.strip() for x in text.split(':'))))
            if len(parts) > 1:
                split_up_block(block, a[0], text, parts, ldict)
        else:
            # try using a span all the time
            path_map[block] = [text]
            parent = a[0].getparent()
            span = parent.makeelement('span',
                                      style='display:block; margin-left: 0em')
            parent.append(span)
            span.append(a[0])
            ldict[span] = 0

        for br in block.xpath('descendant::br'):
            br.tail = None

    # We want a single block for each main entry
    prev_block = blocks[0]
    for block in blocks[1:]:
        pp, pn = path_map[prev_block], path_map[block]
        if pp[0] == pn[0]:
            merge_blocks(prev_block, block, 0, 0, pn, ldict)
        else:
            prev_block = block
Exemplo n.º 32
0
    def find_font_for_text(self, text, allowed_families={'serif', 'sans-serif'},
            preferred_families=('serif', 'sans-serif', 'monospace', 'cursive', 'fantasy')):
        '''
        Find a font on the system capable of rendering the given text.

        Returns a font family (as given by fonts_for_family()) that has a
        "normal" font and that can render the supplied text. If no such font
        exists, returns None.

        :return: (family name, faces) or None, None
        '''
        from calibre.utils.fonts.utils import (supports_text,
                panose_to_css_generic_family, get_printable_characters)
        if not isinstance(text, unicode_type):
            raise TypeError(u'%r is not unicode'%text)
        text = get_printable_characters(text)
        found = {}

        def filter_faces(font):
            try:
                raw = self.get_font_data(font)
                return supports_text(raw, text)
            except:
                pass
            return False

        for family in self.find_font_families():
            faces = list(filter(filter_faces, self.fonts_for_family(family)))
            if not faces:
                continue
            generic_family = panose_to_css_generic_family(faces[0]['panose'])
            if generic_family in allowed_families or generic_family == preferred_families[0]:
                return (family, faces)
            elif generic_family not in found:
                found[generic_family] = (family, faces)

        for f in preferred_families:
            if f in found:
                return found[f]
        return None, None
Exemplo n.º 33
0
def get_device_languages(hub_handle, device_port, buf=None):
    ' Get the languages supported by the device for strings '
    buf = alloc_descriptor_buf(buf)

    def initbuf(b):
        p = cast(b, PUSB_DESCRIPTOR_REQUEST).contents
        p.ConnectionIndex = device_port
        sp = p.SetupPacket
        sp.bmRequest, sp.bRequest = 0x80, USB_REQUEST_GET_DESCRIPTOR
        sp.wValue[1] = USB_STRING_DESCRIPTOR_TYPE
        sp.wLength = MAXIMUM_USB_STRING_LENGTH + 2

    buf, bytes_returned = device_io_control(
        hub_handle, IOCTL_USB_GET_DESCRIPTOR_FROM_NODE_CONNECTION, buf, buf,
        initbuf)
    data = cast(buf, PUSB_DESCRIPTOR_REQUEST).contents.Data
    sz, dtype = data.bLength, data.bType
    if dtype != 0x03:
        raise WindowsError('Invalid datatype for string descriptor: 0x%x' %
                           dtype)
    data = cast(data.String, POINTER(USHORT * (sz // 2)))
    return buf, list(filter(None, data.contents))
Exemplo n.º 34
0
 def __init__(
     self,
     items,
     level1=DEFAULT_LEVEL1,
     level2=DEFAULT_LEVEL2,
     level3=DEFAULT_LEVEL3,
     scorer=None
 ):
     with wlock:
         if not workers:
             requests, results = Queue(), Queue()
             w = [Worker(requests, results) for i in range(max(1, cpu_count()))]
             [x.start() for x in w]
             workers.extend(w)
     items = map(lambda x: normalize('NFC', unicode_type(x)), filter(None, items))
     self.items = items = tuple(items)
     tasks = split(items, len(workers))
     self.task_maps = [{j: i for j, (i, _) in enumerate(task)} for task in tasks]
     scorer = scorer or default_scorer
     self.scorers = [
         scorer(tuple(map(itemgetter(1), task_items))) for task_items in tasks
     ]
     self.sort_keys = None
Exemplo n.º 35
0
    def get_overrides(self, r, c, num_of_rows, num_of_cols_in_row):
        'List of possible overrides for the given para'
        overrides = ['wholeTable']

        def divisor(m, n):
            return (m - (m % n)) // n

        if c is not None:
            odd_column_band = (divisor(c, self.table_style.col_band_size) %
                               2) == 1
            overrides.append('band%dVert' % (1 if odd_column_band else 2))
        odd_row_band = (divisor(r, self.table_style.row_band_size) % 2) == 1
        overrides.append('band%dHorz' % (1 if odd_row_band else 2))

        # According to the OOXML spec columns should have higher override
        # priority than rows, but Word seems to do it the other way around.
        if c is not None:
            if c == 0:
                overrides.append('firstCol')
            if c >= num_of_cols_in_row - 1:
                overrides.append('lastCol')
        if r == 0:
            overrides.append('firstRow')
        if r >= num_of_rows - 1:
            overrides.append('lastRow')
        if c is not None:
            if r == 0:
                if c == 0:
                    overrides.append('nwCell')
                if c == num_of_cols_in_row - 1:
                    overrides.append('neCell')
            if r == num_of_rows - 1:
                if c == 0:
                    overrides.append('swCell')
                if c == num_of_cols_in_row - 1:
                    overrides.append('seCell')
        return tuple(filter(self.override_allowed, overrides))
Exemplo n.º 36
0
def load_icon_resource_as_pixmap(icon_resource, size=ICON_SIZE):
    if not icon_resource:
        return
    parts = tuple(filter(None, re.split(r',([-0-9]+$)', icon_resource)))
    if len(parts) != 2:
        return
    module, index = parts
    index = int(index)
    if module.startswith('"') and module.endswith('"'):
        module = split_commandline(module)[0]
    hmodule = winutil.load_library(module, winutil.LOAD_LIBRARY_AS_DATAFILE | winutil.LOAD_LIBRARY_AS_IMAGE_RESOURCE)
    icons = winutil.load_icons(hmodule, index)
    pixmaps = []
    must_use_qt()
    for icon_data, icon_handle in icons:
        pixmap = QPixmap()
        pixmap.loadFromData(icon_data)
        if pixmap.isNull() and bool(icon_handle):
            pixmap = hicon_to_pixmap(icon_handle)
        if pixmap.isNull():
            continue
        pixmaps.append(pixmap)
    if not pixmaps:
        return

    def area(p):
        return p.width() * p.height()

    pixmaps.sort(key=area)
    q = size * size
    for pmap in pixmaps:
        if area(pmap) >= q:
            if area(pmap) == q:
                return pmap
            return pmap.scaled(size, size, aspectRatioMode=Qt.KeepAspectRatio, transformMode=Qt.SmoothTransformation)
    return pixmaps[-1].scaled(size, size, aspectRatioMode=Qt.KeepAspectRatio, transformMode=Qt.SmoothTransformation)
Exemplo n.º 37
0
def find_identical_books(mi, data):
    author_map, aid_map, title_map, lang_map = data
    found_books = None
    for a in mi.authors:
        author_ids = author_map.get(icu_lower(a))
        if author_ids is None:
            return set()
        books_by_author = {
            book_id
            for aid in author_ids for book_id in aid_map.get(aid, ())
        }
        if found_books is None:
            found_books = books_by_author
        else:
            found_books &= books_by_author
        if not found_books:
            return set()

    ans = set()
    titleq = fuzzy_title(mi.title)
    for book_id in found_books:
        title = title_map.get(book_id, '')
        if fuzzy_title(title) == titleq:
            ans.add(book_id)

    langq = tuple(
        filter(lambda x: x and x != 'und',
               map(canonicalize_lang, mi.languages or ())))
    if not langq:
        return ans

    def lang_matches(book_id):
        book_langq = lang_map.get(book_id)
        return not book_langq or langq == book_langq

    return {book_id for book_id in ans if lang_matches(book_id)}
Exemplo n.º 38
0
def sony_metadata(oeb):
    m = oeb.metadata
    title = short_title = unicode_type(m.title[0])
    publisher = __appname__ + ' ' + __version__
    try:
        pt = unicode_type(oeb.metadata.publication_type[0])
        short_title = ':'.join(pt.split(':')[2:])
    except:
        pass

    try:
        date = parse_date(unicode_type(m.date[0]),
                as_utc=False).strftime('%Y-%m-%d')
    except:
        date = strftime('%Y-%m-%d')
    try:
        language = unicode_type(m.language[0]).replace('_', '-')
    except:
        language = 'en'
    short_title = xml(short_title, True)

    metadata = SONY_METADATA.format(title=xml(title),
            short_title=short_title,
            publisher=xml(publisher), issue_date=xml(date),
            language=xml(language))

    updated = strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())

    def cal_id(x):
        for k, v in x.attrib.items():
            if k.endswith('scheme') and v == 'uuid':
                return True

    try:
        base_id = unicode_type(list(filter(cal_id, m.identifier))[0])
    except:
        base_id = unicode_type(uuid4())

    toc = oeb.toc

    if False and toc.depth() < 3:
        # Single section periodical
        # Disabled since I prefer the current behavior
        from calibre.ebooks.oeb.base import TOC
        section = TOC(klass='section', title=_('All articles'),
                    href=oeb.spine[2].href)
        for x in toc:
            section.nodes.append(x)
        toc = TOC(klass='periodical', href=oeb.spine[2].href,
                    title=unicode_type(oeb.metadata.title[0]))
        toc.nodes.append(section)

    entries = []
    seen_titles = set()
    for i, section in enumerate(toc):
        if not section.href:
            continue
        secid = 'section%d'%i
        sectitle = section.title
        if not sectitle:
            sectitle = _('Unknown')
        d = 1
        bsectitle = sectitle
        while sectitle in seen_titles:
            sectitle = bsectitle + ' ' + unicode_type(d)
            d += 1
        seen_titles.add(sectitle)
        sectitle = xml(sectitle, True)
        secdesc = section.description
        if not secdesc:
            secdesc = ''
        secdesc = xml(secdesc)
        entries.append(SONY_ATOM_SECTION.format(title=sectitle,
            href=section.href, id=xml(base_id)+'/'+secid,
            short_title=short_title, desc=secdesc, updated=updated))

        for j, article in enumerate(section):
            if not article.href:
                continue
            atitle = article.title
            btitle = atitle
            d = 1
            while atitle in seen_titles:
                atitle = btitle + ' ' + unicode_type(d)
                d += 1

            auth = article.author if article.author else ''
            desc = section.description
            if not desc:
                desc = ''
            aid = 'article%d'%j

            entries.append(SONY_ATOM_ENTRY.format(
                title=xml(atitle),
                author=xml(auth),
                updated=updated,
                desc=desc,
                short_title=short_title,
                section_title=sectitle,
                href=article.href,
                word_count=unicode_type(1),
                id=xml(base_id)+'/'+secid+'/'+aid
            ))

    atom = SONY_ATOM.format(short_title=short_title,
            entries='\n\n'.join(entries), updated=updated,
            id=xml(base_id)).encode('utf-8')

    return metadata, atom
Exemplo n.º 39
0
def build_index(rd, books, num, search, sort, order, start, total, url_base,
                field_metadata, ctx, library_map, library_id):  # {{{
    logo = E.div(E.img(src=ctx.url_for('/static', what='calibre.png'),
                       alt=__appname__),
                 id='logo')
    search_box = build_search_box(num, search, sort, order, ctx,
                                  field_metadata, library_id)
    navigation = build_navigation(start, num, total, url_base)
    navigation2 = build_navigation(start, num, total, url_base)
    if library_map:
        choose_library = build_choose_library(ctx, library_map)
    books_table = E.table(id='listing')

    body = E.body(logo, search_box, navigation, E.hr(class_='spacer'),
                  books_table, E.hr(class_='spacer'), navigation2)

    for book in books:
        thumbnail = E.td(
            E.img(type='image/jpeg',
                  border='0',
                  src=ctx.url_for('/get',
                                  what='thumb',
                                  book_id=book.id,
                                  library_id=library_id),
                  class_='thumbnail'))

        data = E.td()
        for fmt in book.formats or ():
            if not fmt or fmt.lower().startswith('original_'):
                continue
            s = E.span(E.a(fmt.lower(),
                           href=ctx.url_for('/legacy/get',
                                            what=fmt,
                                            book_id=book.id,
                                            library_id=library_id,
                                            filename=book_filename(
                                                rd, book.id, book, fmt))),
                       class_='button')
            s.tail = u''
            data.append(s)

        div = E.div(class_='data-container')
        data.append(div)

        series = ('[%s - %s]' %
                  (book.series, book.series_index)) if book.series else ''
        tags = ('Tags=[%s]' % ', '.join(book.tags)) if book.tags else ''

        ctext = ''
        for key in filter(ctx.is_field_displayable,
                          field_metadata.ignorable_field_keys()):
            fm = field_metadata[key]
            if fm['datatype'] == 'comments':
                continue
            name, val = book.format_field(key)
            if val:
                ctext += '%s=[%s] ' % (name, val)

        first = E.span('%s %s by %s' %
                       (book.title, series, authors_to_string(book.authors)),
                       class_='first-line')
        div.append(first)
        ds = '' if is_date_undefined(book.timestamp) else strftime(
            '%d %b, %Y', t=dt_as_local(book.timestamp).timetuple())
        second = E.span('%s %s %s' % (ds, tags, ctext), class_='second-line')
        div.append(second)

        books_table.append(E.tr(thumbnail, data))

    if library_map:
        body.append(choose_library)
    body.append(
        E.div(E.a(_('Switch to the full interface (non-mobile interface)'),
                  href=ctx.url_for(None),
                  style="text-decoration: none; color: blue",
                  title=_('The full interface gives you many more features, '
                          'but it may not work well on a small screen')),
              style="text-align:center"))
    return E.html(
        E.head(
            E.title(__appname__ + ' Library'),
            E.link(rel='icon',
                   href=ctx.url_for('/favicon.png'),
                   type='image/png'),
            E.link(rel='stylesheet',
                   type='text/css',
                   href=ctx.url_for('/static', what='mobile.css')),
            E.link(rel='apple-touch-icon',
                   href=ctx.url_for("/static", what='calibre.png')),
            E.meta(name="robots", content="noindex")),  # End head
        body)  # End html
Exemplo n.º 40
0
    def workaround_ade_quirks(self):  # {{{
        '''
        Perform various markup transforms to get the output to render correctly
        in the quirky ADE.
        '''
        from calibre.ebooks.oeb.base import XPath, XHTML, barename, urlunquote

        stylesheet = self.oeb.manifest.main_stylesheet

        # ADE cries big wet tears when it encounters an invalid fragment
        # identifier in the NCX toc.
        frag_pat = re.compile(r'[-A-Za-z0-9_:.]+$')
        for node in self.oeb.toc.iter():
            href = getattr(node, 'href', None)
            if hasattr(href, 'partition'):
                base, _, frag = href.partition('#')
                frag = urlunquote(frag)
                if frag and frag_pat.match(frag) is None:
                    self.log.warn(
                        'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it'
                        % frag)
                    node.href = base

        for x in self.oeb.spine:
            root = x.data
            body = XPath('//h:body')(root)
            if body:
                body = body[0]

            if hasattr(body, 'xpath'):
                # remove <img> tags with empty src elements
                bad = []
                for x in XPath('//h:img')(body):
                    src = x.get('src', '').strip()
                    if src in ('', '#') or src.startswith('http:'):
                        bad.append(x)
                for img in bad:
                    img.getparent().remove(img)

                # Add id attribute to <a> tags that have name
                for x in XPath('//h:a[@name]')(body):
                    if not x.get('id', False):
                        x.set('id', x.get('name'))
                    # The delightful epubcheck has started complaining about <a> tags that
                    # have name attributes.
                    x.attrib.pop('name')

                # Replace <br> that are children of <body> as ADE doesn't handle them
                for br in XPath('./h:br')(body):
                    if br.getparent() is None:
                        continue
                    try:
                        prior = next(br.itersiblings(preceding=True))
                        priortag = barename(prior.tag)
                        priortext = prior.tail
                    except:
                        priortag = 'body'
                        priortext = body.text
                    if priortext:
                        priortext = priortext.strip()
                    br.tag = XHTML('p')
                    br.text = '\u00a0'
                    style = br.get('style', '').split(';')
                    style = list(filter(None, map(lambda x: x.strip(), style)))
                    style.append('margin:0pt; border:0pt')
                    # If the prior tag is a block (including a <br> we replaced)
                    # then this <br> replacement should have a 1-line height.
                    # Otherwise it should have no height.
                    if not priortext and priortag in block_level_tags:
                        style.append('height:1em')
                    else:
                        style.append('height:0pt')
                    br.set('style', '; '.join(style))

            for tag in XPath('//h:embed')(root):
                tag.getparent().remove(tag)
            for tag in XPath('//h:object')(root):
                if tag.get('type', '').lower().strip() in {
                        'image/svg+xml', 'application/svg+xml'
                }:
                    continue
                tag.getparent().remove(tag)

            for tag in XPath('//h:title|//h:style')(root):
                if not tag.text:
                    tag.getparent().remove(tag)
            for tag in XPath('//h:script')(root):
                if (not tag.text and not tag.get('src', False)
                        and tag.get('type', None) != 'text/x-mathjax-config'):
                    tag.getparent().remove(tag)
            for tag in XPath('//h:body/descendant::h:script')(root):
                tag.getparent().remove(tag)

            formchildren = XPath('./h:input|./h:button|./h:textarea|'
                                 './h:label|./h:fieldset|./h:legend')
            for tag in XPath('//h:form')(root):
                if formchildren(tag):
                    tag.getparent().remove(tag)
                else:
                    # Not a real form
                    tag.tag = XHTML('div')

            for tag in XPath('//h:center')(root):
                tag.tag = XHTML('div')
                tag.set('style', 'text-align:center')
            # ADE can't handle &amp; in an img url
            for tag in XPath('//h:img[@src]')(root):
                tag.set('src', tag.get('src', '').replace('&', ''))

            # ADE whimpers in fright when it encounters a <td> outside a
            # <table>
            in_table = XPath('ancestor::h:table')
            for tag in XPath('//h:td|//h:tr|//h:th')(root):
                if not in_table(tag):
                    tag.tag = XHTML('div')

            # ADE fails to render non breaking hyphens/soft hyphens/zero width spaces
            special_chars = re.compile('[\u200b\u00ad]')
            for elem in root.iterdescendants('*'):
                if elem.text:
                    elem.text = special_chars.sub('', elem.text)
                    elem.text = elem.text.replace('\u2011', '-')
                if elem.tail:
                    elem.tail = special_chars.sub('', elem.tail)
                    elem.tail = elem.tail.replace('\u2011', '-')

            if stylesheet is not None:
                # ADE doesn't render lists correctly if they have left margins
                from css_parser.css import CSSRule
                for lb in XPath('//h:ul[@class]|//h:ol[@class]')(root):
                    sel = '.' + lb.get('class')
                    for rule in stylesheet.data.cssRules.rulesOfType(
                            CSSRule.STYLE_RULE):
                        if sel == rule.selectorList.selectorText:
                            rule.style.removeProperty('margin-left')
                            # padding-left breaks rendering in webkit and gecko
                            rule.style.removeProperty('padding-left')
                # Change whitespace:pre to pre-wrap to accommodate readers that
                # cannot scroll horizontally
                for rule in stylesheet.data.cssRules.rulesOfType(
                        CSSRule.STYLE_RULE):
                    style = rule.style
                    ws = style.getPropertyValue('white-space')
                    if ws == 'pre':
                        style.setProperty('white-space', 'pre-wrap')
Exemplo n.º 41
0
 def _finished(self, *args):
     if self.model:
         update_plugins = list(
             filter(filter_upgradeable_plugins, self.model.display_plugins))
         self.gui.recalc_update_label(len(update_plugins))
Exemplo n.º 42
0
 def edit_tags(self):
     from calibre.gui2.dialogs.tag_editor import TagEditor
     d = TagEditor(self, get_gui().current_db, current_tags=list(filter(None, [x.strip() for x in self.query.text().split(',')])))
     if d.exec_() == d.Accepted:
         self.query.setText(', '.join(d.tags))
Exemplo n.º 43
0
def py_clean_xml_chars(unicode_string):
    return u''.join(filter(allowed, unicode_string))
Exemplo n.º 44
0
 def change_restriction(username):
     r = m.restrictions(username)
     if r is None:
         raise SystemExit('The user {} does not exist'.format(username))
     if r['allowed_library_names']:
         libs = r['allowed_library_names']
         prints(
             ngettext(
                 '{} is currently only allowed to access the library named: {}',
                 '{} is currently only allowed to access the libraries named: {}',
                 len(libs)).format(username, ', '.join(libs)))
     if r['blocked_library_names']:
         libs = r['blocked_library_names']
         prints(
             ngettext(
                 '{} is currently not allowed to access the library named: {}',
                 '{} is currently not allowed to access the libraries named: {}',
                 len(libs)).format(username, ', '.join(libs)))
     if r['library_restrictions']:
         prints(
             _('{} has the following additional per-library restrictions:').
             format(username))
         for k, v in iteritems(r['library_restrictions']):
             prints(k + ':', v)
     else:
         prints(
             _('{} has no additional per-library restrictions').format(
                 username))
     c = choice(choices=[
         _('Allow access to all libraries'),
         _('Allow access to only specified libraries'),
         _('Allow access to all, except specified libraries'),
         _('Change per-library restrictions'),
         _('Cancel')
     ])
     if c == 0:
         m.update_user_restrictions(username, {})
     elif c == 3:
         while True:
             library = get_input(_('Enter the name of the library:'))
             if not library:
                 break
             prints(
                 _('Enter a search expression, access will be granted only to books matching this expression.'
                   ' An empty expression will grant access to all books.'))
             plr = get_input(_('Search expression:'))
             if plr:
                 r['library_restrictions'][library] = plr
             else:
                 r['library_restrictions'].pop(library, None)
             m.update_user_restrictions(username, r)
             if get_input(_('Another restriction?') + ' (y/n):') != 'y':
                 break
     elif c == 4:
         pass
     else:
         names = get_input(
             _('Enter a comma separated list of library names:'))
         names = list(filter(None, [x.strip() for x in names.split(',')]))
         w = 'allowed_library_names' if c == 1 else 'blocked_library_names'
         t = _('Allowing access only to libraries: {}') if c == 1 else _(
             'Allowing access to all libraries, except: {}')
         prints(t.format(', '.join(names)))
         m.update_user_restrictions(username, {w: names})
Exemplo n.º 45
0
    def run(self, path_to_output, opts, db, notification=DummyReporter()):
        from calibre.utils.date import isoformat
        from calibre.utils.html2text import html2text
        from calibre.utils.bibtex import BibTeX
        from calibre.library.save_to_disk import preprocess_template
        from calibre.utils.logging import default_log as log
        from calibre.utils.filenames import ascii_text

        library_name = os.path.basename(db.library_path)

        def create_bibtex_entry(entry,
                                fields,
                                mode,
                                template_citation,
                                bibtexdict,
                                db,
                                citation_bibtex=True,
                                calibre_files=True):

            # Bibtex doesn't like UTF-8 but keep unicode until writing
            # Define starting chain or if book valid strict and not book return a Fail string

            bibtex_entry = []
            if mode != "misc" and check_entry_book_valid(entry):
                bibtex_entry.append('@book{')
            elif mode != "book":
                bibtex_entry.append('@misc{')
            else:
                # case strict book
                return ''

            if citation_bibtex:
                # Citation tag
                bibtex_entry.append(
                    make_bibtex_citation(entry, template_citation, bibtexdict))
                bibtex_entry = [' '.join(bibtex_entry)]

            for field in fields:
                if field.startswith('#'):
                    item = db.get_field(entry['id'], field, index_is_id=True)
                    if isinstance(item, (bool, numbers.Number)):
                        item = repr(item)
                elif field == 'title_sort':
                    item = entry['sort']
                elif field == 'library_name':
                    item = library_name
                else:
                    item = entry[field]

                # check if the field should be included (none or empty)
                if item is None:
                    continue
                try:
                    if len(item) == 0:
                        continue
                except TypeError:
                    pass

                if field == 'authors':
                    bibtex_entry.append('author = "%s"' %
                                        bibtexdict.bibtex_author_format(item))

                elif field == 'id':
                    bibtex_entry.append('calibreid = "%s"' % int(item))

                elif field == 'rating':
                    bibtex_entry.append('rating = "%s"' % int(item))

                elif field == 'size':
                    bibtex_entry.append('%s = "%s octets"' %
                                        (field, int(item)))

                elif field == 'tags':
                    # A list to flatten
                    bibtex_entry.append(
                        'tags = "%s"' %
                        bibtexdict.utf8ToBibtex(', '.join(item)))

                elif field == 'comments':
                    # \n removal
                    item = item.replace('\r\n', ' ')
                    item = item.replace('\n', ' ')
                    # unmatched brace removal (users should use \leftbrace or \rightbrace for single braces)
                    item = bibtexdict.stripUnmatchedSyntax(item, '{', '}')
                    # html to text
                    try:
                        item = html2text(item)
                    except:
                        log.warn("Failed to convert comments to text")
                    bibtex_entry.append('note = "%s"' %
                                        bibtexdict.utf8ToBibtex(item))

                elif field == 'isbn':
                    # Could be 9, 10 or 13 digits
                    bibtex_entry.append('isbn = "%s"' % format_isbn(item))

                elif field == 'formats':
                    # Add file path if format is selected
                    formats = [
                        format.rpartition('.')[2].lower() for format in item
                    ]
                    bibtex_entry.append('formats = "%s"' % ', '.join(formats))
                    if calibre_files:
                        files = [
                            ':%s:%s' %
                            (format, format.rpartition('.')[2].upper())
                            for format in item
                        ]
                        bibtex_entry.append('file = "%s"' % ', '.join(files))

                elif field == 'series_index':
                    bibtex_entry.append('volume = "%s"' % int(item))

                elif field == 'timestamp':
                    bibtex_entry.append('timestamp = "%s"' %
                                        isoformat(item).partition('T')[0])

                elif field == 'pubdate':
                    bibtex_entry.append('year = "%s"' % item.year)
                    bibtex_entry.append(
                        'month = "%s"' %
                        bibtexdict.utf8ToBibtex(strftime("%b", item)))

                elif field.startswith('#') and isinstance(
                        item, string_or_bytes):
                    bibtex_entry.append(
                        'custom_%s = "%s"' %
                        (field[1:], bibtexdict.utf8ToBibtex(item)))

                elif isinstance(item, string_or_bytes):
                    # elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
                    # 'author_sort', 'series', 'title_sort'] :
                    bibtex_entry.append('%s = "%s"' %
                                        (field, bibtexdict.utf8ToBibtex(item)))

            bibtex_entry = ',\n    '.join(bibtex_entry)
            bibtex_entry += ' }\n\n'

            return bibtex_entry

        def check_entry_book_valid(entry):
            # Check that the required fields are ok for a book entry
            for field in ['title', 'authors', 'publisher']:
                if entry[field] is None or len(entry[field]) == 0:
                    return False
            if entry['pubdate'] is None:
                return False
            else:
                return True

        def make_bibtex_citation(entry, template_citation, bibtexclass):

            # define a function to replace the template entry by its value
            def tpl_replace(objtplname):

                tpl_field = re.sub(r'[\{\}]', '', objtplname.group())

                if tpl_field in TEMPLATE_ALLOWED_FIELDS:
                    if tpl_field in ['pubdate', 'timestamp']:
                        tpl_field = isoformat(
                            entry[tpl_field]).partition('T')[0]
                    elif tpl_field in ['tags', 'authors']:
                        tpl_field = entry[tpl_field][0]
                    elif tpl_field in ['id', 'series_index']:
                        tpl_field = unicode_type(entry[tpl_field])
                    else:
                        tpl_field = entry[tpl_field]
                    return ascii_text(tpl_field)
                else:
                    return ''

            if len(template_citation) > 0:
                tpl_citation = bibtexclass.utf8ToBibtex(
                    bibtexclass.ValidateCitationKey(
                        re.sub(r'\{[^{}]*\}', tpl_replace, template_citation)))

                if len(tpl_citation) > 0:
                    return tpl_citation

            if len(entry["isbn"]) > 0:
                template_citation = '%s' % re.sub(r'[\D]', '', entry["isbn"])

            else:
                template_citation = '%s' % unicode_type(entry["id"])

            return bibtexclass.ValidateCitationKey(template_citation)

        self.fmt = path_to_output.rpartition('.')[2]
        self.notification = notification

        # Combobox options
        bibfile_enc = ['utf8', 'cp1252', 'ascii']
        bibfile_enctag = ['strict', 'replace', 'ignore', 'backslashreplace']
        bib_entry = ['mixed', 'misc', 'book']

        # Needed beacause CLI return str vs int by widget
        try:
            bibfile_enc = bibfile_enc[opts.bibfile_enc]
            bibfile_enctag = bibfile_enctag[opts.bibfile_enctag]
            bib_entry = bib_entry[opts.bib_entry]
        except:
            if opts.bibfile_enc in bibfile_enc:
                bibfile_enc = opts.bibfile_enc
            else:
                log.warn("Incorrect --choose-encoding flag, revert to default")
                bibfile_enc = bibfile_enc[0]
            if opts.bibfile_enctag in bibfile_enctag:
                bibfile_enctag = opts.bibfile_enctag
            else:
                log.warn(
                    "Incorrect --choose-encoding-configuration flag, revert to default"
                )
                bibfile_enctag = bibfile_enctag[0]
            if opts.bib_entry in bib_entry:
                bib_entry = opts.bib_entry
            else:
                log.warn("Incorrect --entry-type flag, revert to default")
                bib_entry = bib_entry[0]

        if opts.verbose:
            opts_dict = vars(opts)
            log("%s(): Generating %s" % (self.name, self.fmt))
            if opts.connected_device['is_device_connected']:
                log(" connected_device: %s" % opts.connected_device['name'])
            if opts_dict['search_text']:
                log(" --search='%s'" % opts_dict['search_text'])

            if opts_dict['ids']:
                log(" Book count: %d" % len(opts_dict['ids']))
                if opts_dict['search_text']:
                    log(" (--search ignored when a subset of the database is specified)"
                        )

            if opts_dict['fields']:
                if opts_dict['fields'] == 'all':
                    log(" Fields: %s" % ', '.join(FIELDS[1:]))
                else:
                    log(" Fields: %s" % opts_dict['fields'])

            log(" Output file will be encoded in %s with %s flag" %
                (bibfile_enc, bibfile_enctag))

            log(" BibTeX entry type is %s with a citation like '%s' flag" %
                (bib_entry, opts_dict['bib_cit']))

        # If a list of ids are provided, don't use search_text
        if opts.ids:
            opts.search_text = None

        data = self.search_sort_db(db, opts)

        if not len(data):
            log.error(
                "\nNo matching database entries for search criteria '%s'" %
                opts.search_text)

        # Get the requested output fields as a list
        fields = self.get_output_fields(db, opts)

        if not len(data):
            log.error(
                "\nNo matching database entries for search criteria '%s'" %
                opts.search_text)

        # Initialize BibTeX class
        bibtexc = BibTeX()

        # Entries writing after Bibtex formating (or not)
        if bibfile_enc != 'ascii':
            bibtexc.ascii_bibtex = False
        else:
            bibtexc.ascii_bibtex = True

        # Check citation choice and go to default in case of bad CLI
        if isinstance(opts.impcit, string_or_bytes):
            if opts.impcit == 'False':
                citation_bibtex = False
            elif opts.impcit == 'True':
                citation_bibtex = True
            else:
                log.warn("Incorrect --create-citation, revert to default")
                citation_bibtex = True
        else:
            citation_bibtex = opts.impcit

        # Check add file entry and go to default in case of bad CLI
        if isinstance(opts.addfiles, string_or_bytes):
            if opts.addfiles == 'False':
                addfiles_bibtex = False
            elif opts.addfiles == 'True':
                addfiles_bibtex = True
            else:
                log.warn("Incorrect --add-files-path, revert to default")
                addfiles_bibtex = True
        else:
            addfiles_bibtex = opts.addfiles

        # Preprocess for error and light correction
        template_citation = preprocess_template(opts.bib_cit)

        # Open output and write entries
        with codecs.open(path_to_output, 'w', bibfile_enc, bibfile_enctag)\
            as outfile:
            # File header
            nb_entries = len(data)

            # check in book strict if all is ok else throw a warning into log
            if bib_entry == 'book':
                nb_books = len(list(filter(check_entry_book_valid, data)))
                if nb_books < nb_entries:
                    log.warn("Only %d entries in %d are book compatible" %
                             (nb_books, nb_entries))
                    nb_entries = nb_books

            # If connected device, add 'On Device' values to data
            if opts.connected_device[
                    'is_device_connected'] and 'ondevice' in fields:
                for entry in data:
                    entry[
                        'ondevice'] = db.catalog_plugin_on_device_temp_mapping[
                            entry['id']]['ondevice']

            # outfile.write('%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
            outfile.write(
                '@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
                % (nb_entries, strftime("%A, %d. %B %Y %H:%M")))

            for entry in data:
                outfile.write(
                    create_bibtex_entry(entry, fields, bib_entry,
                                        template_citation, bibtexc, db,
                                        citation_bibtex, addfiles_bibtex))
Exemplo n.º 46
0
def metadata_from_xmp_packet(raw_bytes):
    root = parse_xmp_packet(raw_bytes)
    mi = Metadata(_('Unknown'))
    title = first_alt('//dc:title', root)
    if title:
        if title.startswith(r'\376\377'):
            # corrupted XMP packet generated by Nitro PDF. See
            # https://bugs.launchpad.net/calibre/+bug/1541981
            raise ValueError(
                'Corrupted XMP metadata packet detected, probably generated by Nitro PDF'
            )
        mi.title = title
    authors = multiple_sequences('//dc:creator', root)
    if authors:
        mi.authors = authors
    tags = multiple_sequences('//dc:subject', root) or multiple_sequences(
        '//pdf:Keywords', root)
    if tags:
        mi.tags = tags
    comments = first_alt('//dc:description', root)
    if comments:
        mi.comments = comments
    publishers = multiple_sequences('//dc:publisher', root)
    if publishers:
        mi.publisher = publishers[0]
    try:
        pubdate = parse_date(first_sequence('//dc:date', root)
                             or first_simple('//xmp:CreateDate', root),
                             assume_utc=False)
    except:
        pass
    else:
        mi.pubdate = pubdate
    bkp = first_simple('//xmp:CreatorTool', root)
    if bkp:
        mi.book_producer = bkp
    md = safe_parse_date(first_simple('//xmp:MetadataDate', root))
    mod = safe_parse_date(first_simple('//xmp:ModifyDate', root))
    fd = more_recent(md, mod)
    if fd is not None:
        mi.metadata_date = fd
    rating = first_simple('//calibre:rating', root)
    if rating is not None:
        try:
            rating = float(rating)
            if 0 <= rating <= 10:
                mi.rating = rating
        except (ValueError, TypeError):
            pass
    series, series_index = read_series(root)
    if series:
        mi.series, mi.series_index = series, series_index
    for x in ('title_sort', 'author_sort'):
        for elem in XPath('//calibre:' + x)(root):
            val = read_simple_property(elem)
            if val:
                setattr(mi, x, val)
                break
    for x in ('author_link_map', 'user_categories'):
        val = first_simple('//calibre:' + x, root)
        if val:
            try:
                setattr(mi, x, json.loads(val))
            except:
                pass

    languages = multiple_sequences('//dc:language', root)
    if languages:
        languages = list(filter(None, map(canonicalize_lang, languages)))
        if languages:
            mi.languages = languages

    identifiers = {}
    for xmpid in XPath('//xmp:Identifier')(root):
        for scheme, value in read_xmp_identifers(xmpid):
            if scheme and value:
                identifiers[scheme.lower()] = value

    for namespace in ('prism', 'pdfx'):
        for scheme in KNOWN_ID_SCHEMES:
            if scheme not in identifiers:
                val = first_simple('//%s:%s' % (namespace, scheme), root)
                scheme = scheme.lower()
                if scheme == 'isbn':
                    val = check_isbn(val)
                elif scheme == 'doi':
                    val = check_doi(val)
                if val:
                    identifiers[scheme] = val

    # Check Dublin Core for recognizable identifier types
    for scheme, check_func in iteritems({
            'doi': check_doi,
            'isbn': check_isbn
    }):
        if scheme not in identifiers:
            val = check_func(first_simple('//dc:identifier', root))
            if val:
                identifiers['doi'] = val

    if identifiers:
        mi.set_identifiers(identifiers)

    read_user_metadata(mi, root)

    return mi
Exemplo n.º 47
0
def read_tags(root, prefixes, refines):
    ans = []
    for dc in XPath('./opf:metadata/dc:subject')(root):
        if dc.text:
            ans.extend(map(normalize_whitespace, dc.text.split(',')))
    return uniq(list(filter(None, ans)))
Exemplo n.º 48
0
def metadata_to_xmp_packet(mi):
    A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x'))
    R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf'))
    root = A.xmpmeta(R.RDF)
    rdf = root[0]
    dc = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('dc'))
    dc.set(expand('rdf:about'), '')
    rdf.append(dc)
    for prop, tag in iteritems({
            'title': 'dc:title',
            'comments': 'dc:description'
    }):
        val = mi.get(prop) or ''
        create_alt_property(dc, tag, val)
    for prop, (tag, ordered) in iteritems({
            'authors': ('dc:creator', True),
            'tags': ('dc:subject', False),
            'publisher': ('dc:publisher', False),
    }):
        val = mi.get(prop) or ()
        if isinstance(val, string_or_bytes):
            val = [val]
        create_sequence_property(dc, tag, val, ordered)
    if not mi.is_null('pubdate'):
        create_sequence_property(dc, 'dc:date',
                                 [isoformat(mi.pubdate, as_utc=False)
                                  ])  # Adobe spec recommends local time
    if not mi.is_null('languages'):
        langs = list(
            filter(
                None,
                map(lambda x: lang_as_iso639_1(x) or canonicalize_lang(x),
                    mi.languages)))
        if langs:
            create_sequence_property(dc, 'dc:language', langs, ordered=False)

    xmp = rdf.makeelement(expand('rdf:Description'),
                          nsmap=nsmap('xmp', 'xmpidq'))
    xmp.set(expand('rdf:about'), '')
    rdf.append(xmp)
    extra_ids = {}
    for x in ('prism', 'pdfx'):
        p = extra_ids[x] = rdf.makeelement(expand('rdf:Description'),
                                           nsmap=nsmap(x))
        p.set(expand('rdf:about'), '')
        rdf.append(p)

    identifiers = mi.get_identifiers()
    if identifiers:
        create_identifiers(xmp, identifiers)
        for scheme, val in iteritems(identifiers):
            if scheme in {'isbn', 'doi'}:
                for prefix, parent in iteritems(extra_ids):
                    ie = parent.makeelement(expand('%s:%s' % (prefix, scheme)))
                    ie.text = val
                    parent.append(ie)

    d = xmp.makeelement(expand('xmp:MetadataDate'))
    d.text = isoformat(now(), as_utc=False)
    xmp.append(d)

    calibre = rdf.makeelement(expand('rdf:Description'),
                              nsmap=nsmap('calibre', 'calibreSI', 'calibreCC'))
    calibre.set(expand('rdf:about'), '')
    rdf.append(calibre)
    if not mi.is_null('rating'):
        try:
            r = float(mi.rating)
        except (TypeError, ValueError):
            pass
        else:
            create_simple_property(calibre, 'calibre:rating', '%g' % r)
    if not mi.is_null('series'):
        create_series(calibre, mi.series, mi.series_index)
    if not mi.is_null('timestamp'):
        create_simple_property(calibre, 'calibre:timestamp',
                               isoformat(mi.timestamp, as_utc=False))
    for x in ('author_link_map', 'user_categories'):
        val = getattr(mi, x, None)
        if val:
            create_simple_property(calibre, 'calibre:' + x, dump_dict(val))

    for x in ('title_sort', 'author_sort'):
        if not mi.is_null(x):
            create_simple_property(calibre, 'calibre:' + x, getattr(mi, x))

    all_user_metadata = mi.get_all_user_metadata(True)
    if all_user_metadata:
        create_user_metadata(calibre, all_user_metadata)
    return serialize_xmp_packet(root)
Exemplo n.º 49
0
    def create_periodical_index(self):  # {{{
        periodical_node = iter(self.oeb.toc).next()
        periodical_node_offset = self.serializer.body_start_offset
        periodical_node_size = (self.serializer.body_end_offset -
                                periodical_node_offset)

        normalized_sections = []

        id_offsets = self.serializer.id_offsets

        periodical = PeriodicalIndexEntry(periodical_node_offset,
                                          self.cncx[periodical_node.title],
                                          self.cncx[periodical_node.klass], 0)
        periodical.length = periodical_node_size
        periodical.first_child_index = 1
        periodical.image_index = self.masthead_offset

        seen_sec_offsets = set()
        seen_art_offsets = set()

        for sec in periodical_node:
            normalized_articles = []
            try:
                offset = id_offsets[sec.href]
                label = self.cncx[sec.title]
                klass = self.cncx[sec.klass]
            except:
                continue
            if offset in seen_sec_offsets:
                continue

            seen_sec_offsets.add(offset)
            section = PeriodicalIndexEntry(offset, label, klass, 1)
            section.parent_index = 0

            for art in sec:
                try:
                    offset = id_offsets[art.href]
                    label = self.cncx[art.title]
                    klass = self.cncx[art.klass]
                except:
                    continue
                if offset in seen_art_offsets:
                    continue
                seen_art_offsets.add(offset)
                article = PeriodicalIndexEntry(offset, label, klass, 2)
                normalized_articles.append(article)
                article.author_offset = self.cncx[art.author]
                article.desc_offset = self.cncx[art.description]
                if getattr(art, 'toc_thumbnail', None) is not None:
                    try:
                        ii = self.serializer.images[art.toc_thumbnail] - 1
                        if ii > -1:
                            article.image_index = ii
                    except KeyError:
                        pass  # Image not found in serializer

            if normalized_articles:
                normalized_articles.sort(key=lambda x: x.offset)
                normalized_sections.append((section, normalized_articles))

        normalized_sections.sort(key=lambda x: x[0].offset)

        # Set lengths
        for s, x in enumerate(normalized_sections):
            sec, normalized_articles = x
            try:
                sec.length = normalized_sections[s + 1][0].offset - sec.offset
            except:
                sec.length = self.serializer.body_end_offset - sec.offset
            for i, art in enumerate(normalized_articles):
                try:
                    art.length = normalized_articles[i + 1].offset - art.offset
                except:
                    art.length = sec.offset + sec.length - art.offset

        # Filter
        for i, x in list(enumerate(normalized_sections)):
            sec, normalized_articles = x
            normalized_articles = list(
                filter(lambda x: x.length > 0, normalized_articles))
            normalized_sections[i] = (sec, normalized_articles)

        normalized_sections = list(
            filter(lambda x: x[0].length > 0 and x[1], normalized_sections))

        # Set indices
        i = 0
        for sec, articles in normalized_sections:
            i += 1
            sec.index = i
            sec.parent_index = 0

        for sec, articles in normalized_sections:
            for art in articles:
                i += 1
                art.index = i

                art.parent_index = sec.index

        for sec, normalized_articles in normalized_sections:
            sec.first_child_index = normalized_articles[0].index
            sec.last_child_index = normalized_articles[-1].index

        # Set lengths again to close up any gaps left by filtering
        for s, x in enumerate(normalized_sections):
            sec, articles = x
            try:
                next_offset = normalized_sections[s + 1][0].offset
            except:
                next_offset = self.serializer.body_end_offset
            sec.length = next_offset - sec.offset

            for a, art in enumerate(articles):
                try:
                    next_offset = articles[a + 1].offset
                except:
                    next_offset = sec.next_offset
                art.length = next_offset - art.offset

        # Sanity check
        for s, x in enumerate(normalized_sections):
            sec, articles = x
            try:
                next_sec = normalized_sections[s + 1][0]
            except:
                if (sec.length == 0
                        or sec.next_offset != self.serializer.body_end_offset):
                    raise ValueError('Invalid section layout')
            else:
                if next_sec.offset != sec.next_offset or sec.length == 0:
                    raise ValueError('Invalid section layout')
            for a, art in enumerate(articles):
                try:
                    next_art = articles[a + 1]
                except:
                    if (art.length == 0 or art.next_offset != sec.next_offset):
                        raise ValueError('Invalid article layout')
                else:
                    if art.length == 0 or art.next_offset != next_art.offset:
                        raise ValueError('Invalid article layout')

        # Flatten
        indices = [periodical]
        for sec, articles in normalized_sections:
            indices.append(sec)
            periodical.last_child_index = sec.index

        for sec, articles in normalized_sections:
            for a in articles:
                indices.append(a)

        return indices
Exemplo n.º 50
0
def mi_to_html(mi,
               field_list=None,
               default_author_link=None,
               use_roman_numbers=True,
               rating_font='Liberation Serif',
               rtl=False):
    if field_list is None:
        field_list = get_field_list(mi)
    ans = []
    comment_fields = []
    isdevice = not hasattr(mi, 'id')
    row = u'<td class="title">%s</td><td class="value">%s</td>'
    p = prepare_string_for_xml
    a = partial(prepare_string_for_xml, attribute=True)
    book_id = getattr(mi, 'id', 0)

    for field in (field for field, display in field_list if display):
        try:
            metadata = mi.metadata_for_field(field)
        except:
            continue
        if not metadata:
            continue
        if field == 'sort':
            field = 'title_sort'
        if metadata['is_custom'] and metadata['datatype'] in {
                'bool', 'int', 'float'
        }:
            isnull = mi.get(field) is None
        else:
            isnull = mi.is_null(field)
        if isnull:
            continue
        name = metadata['name']
        if not name:
            name = field
        name += ':'
        disp = metadata['display']
        if metadata['datatype'] == 'comments' or field == 'comments':
            val = getattr(mi, field)
            if val:
                ctype = disp.get('interpret_as') or 'html'
                val = force_unicode(val)
                if ctype == 'long-text':
                    val = '<pre style="white-space:pre-wrap">%s</pre>' % p(val)
                elif ctype == 'short-text':
                    val = '<span>%s</span>' % p(val)
                elif ctype == 'markdown':
                    val = markdown(val)
                else:
                    val = comments_to_html(val)
                if disp.get('heading_position', 'hide') == 'side':
                    ans.append((field, row % (name, val)))
                else:
                    if disp.get('heading_position', 'hide') == 'above':
                        val = '<h3 class="comments-heading">%s</h3>%s' % (
                            p(name), val)
                    comment_fields.append(
                        '<div id="%s" class="comments">%s</div>' %
                        (field.replace('#', '_'), val))
        elif metadata['datatype'] == 'rating':
            val = getattr(mi, field)
            if val:
                star_string = rating_to_stars(
                    val, disp.get('allow_half_stars', False))
                ans.append(
                    (field,
                     u'<td class="title">%s</td><td class="rating value" '
                     'style=\'font-family:"%s"\'>%s</td>' %
                     (name, rating_font, star_string)))
        elif metadata['datatype'] == 'composite':
            val = getattr(mi, field)
            if val:
                val = force_unicode(val)
                if disp.get('contains_html', False):
                    ans.append((field, row % (name, comments_to_html(val))))
                else:
                    if not metadata['is_multiple']:
                        val = '<a href="%s" title="%s">%s</a>' % (
                            search_action(field, val),
                            _('Click to see books with {0}: {1}').format(
                                metadata['name'], a(val)), p(val))
                    else:
                        all_vals = [
                            v.strip() for v in val.split(
                                metadata['is_multiple']['list_to_ui'])
                            if v.strip()
                        ]
                        links = [
                            '<a href="%s" title="%s">%s</a>' %
                            (search_action(field, x),
                             _('Click to see books with {0}: {1}').format(
                                 metadata['name'], a(x)), p(x))
                            for x in all_vals
                        ]
                        val = metadata['is_multiple']['list_to_ui'].join(links)
                    ans.append((field, row % (name, val)))
        elif field == 'path':
            if mi.path:
                path = force_unicode(mi.path, filesystem_encoding)
                scheme = u'devpath' if isdevice else u'path'
                loc = path if isdevice else book_id
                pathstr = _('Click to open')
                extra = ''
                if isdevice:
                    durl = path
                    if durl.startswith('mtp:::'):
                        durl = ':::'.join((durl.split(':::'))[2:])
                    extra = '<br><span style="font-size:smaller">%s</span>' % (
                        prepare_string_for_xml(durl))
                link = '<a href="%s" title="%s">%s</a>%s' % (action(
                    scheme, loc=loc), prepare_string_for_xml(
                        path, True), pathstr, extra)
                ans.append((field, row % (name, link)))
        elif field == 'formats':
            if isdevice:
                continue
            path = mi.path or ''
            bpath = ''
            if path:
                h, t = os.path.split(path)
                bpath = os.sep.join((os.path.basename(h), t))
            data = ({
                'fmt':
                x,
                'path':
                a(path or ''),
                'fname':
                a(mi.format_files.get(x, '')),
                'ext':
                x.lower(),
                'id':
                book_id,
                'bpath':
                bpath,
                'sep':
                os.sep,
                'action':
                action('format',
                       book_id=book_id,
                       fmt=x,
                       path=path or '',
                       fname=mi.format_files.get(x, ''))
            } for x in mi.formats)
            fmts = [
                '<a title="{bpath}{sep}{fname}.{ext}" href="{action}">{fmt}</a>'
                .format(**x) for x in data
            ]
            ans.append((field, row % (name, ', '.join(fmts))))
        elif field == 'identifiers':
            urls = urls_from_identifiers(mi.identifiers)
            links = [
                '<a href="%s" title="%s:%s">%s</a>' %
                (action('identifier',
                        url=url,
                        name=namel,
                        id_type=id_typ,
                        value=id_val,
                        field='identifiers',
                        book_id=book_id), a(id_typ), a(id_val), p(namel))
                for namel, id_typ, id_val, url in urls
            ]
            links = u', '.join(links)
            if links:
                ans.append((field, row % (_('Ids') + ':', links)))
        elif field == 'authors':
            authors = []
            formatter = EvalFormatter()
            for aut in mi.authors:
                link = ''
                if mi.author_link_map.get(aut):
                    link = lt = mi.author_link_map[aut]
                elif default_author_link:
                    if isdevice and default_author_link == 'search-calibre':
                        default_author_link = DEFAULT_AUTHOR_LINK
                    if default_author_link.startswith('search-'):
                        which_src = default_author_link.partition('-')[2]
                        link, lt = author_search_href(which_src,
                                                      title=mi.title,
                                                      author=aut)
                    else:
                        vals = {
                            'author': qquote(aut),
                            'title': qquote(mi.title)
                        }
                        try:
                            vals['author_sort'] = qquote(
                                mi.author_sort_map[aut])
                        except KeyError:
                            vals['author_sort'] = qquote(aut)
                        link = lt = formatter.safe_format(
                            default_author_link, vals, '', vals)
                aut = p(aut)
                if link:
                    authors.append(
                        '<a title="%s" href="%s">%s</a>' %
                        (a(lt), action('author', url=link, name=aut,
                                       title=lt), aut))
                else:
                    authors.append(aut)
            ans.append((field, row % (name, ' & '.join(authors))))
        elif field == 'languages':
            if not mi.languages:
                continue
            names = filter(None, map(calibre_langcode_to_name, mi.languages))
            names = [
                '<a href="%s" title="%s">%s</a>' %
                (search_action('languages', n),
                 _('Search calibre for books with the language: {}').format(n),
                 n) for n in names
            ]
            ans.append((field, row % (name, u', '.join(names))))
        elif field == 'publisher':
            if not mi.publisher:
                continue
            val = '<a href="%s" title="%s">%s</a>' % (search_action_with_data(
                'publisher', mi.publisher,
                book_id), _('Click to see books with {0}: {1}').format(
                    metadata['name'], a(mi.publisher)), p(mi.publisher))
            ans.append((field, row % (name, val)))
        elif field == 'title':
            # otherwise title gets metadata['datatype'] == 'text'
            # treatment below with a click to search link (which isn't
            # too bad), and a right-click 'Delete' option to delete
            # the title (which is bad).
            val = mi.format_field(field)[-1]
            ans.append((field, row % (name, val)))
        else:
            val = mi.format_field(field)[-1]
            if val is None:
                continue
            val = p(val)
            if metadata['datatype'] == 'series':
                sidx = mi.get(field + '_index')
                if sidx is None:
                    sidx = 1.0
                try:
                    st = metadata['search_terms'][0]
                except Exception:
                    st = field
                series = getattr(mi, field)
                val = _('%(sidx)s of <a href="%(href)s" title="%(tt)s">'
                        '<span class="%(cls)s">%(series)s</span></a>') % dict(
                            sidx=fmt_sidx(sidx, use_roman=use_roman_numbers),
                            cls="series_name",
                            series=p(series),
                            href=search_action_with_data(
                                st, series, book_id, field),
                            tt=p(_('Click to see books in this series')))
            elif metadata['datatype'] == 'datetime':
                aval = getattr(mi, field)
                if is_date_undefined(aval):
                    continue
            elif metadata['datatype'] == 'text' and metadata['is_multiple']:
                try:
                    st = metadata['search_terms'][0]
                except Exception:
                    st = field
                all_vals = mi.get(field)
                if not metadata.get('display', {}).get('is_names', False):
                    all_vals = sorted(all_vals, key=sort_key)
                links = [
                    '<a href="%s" title="%s">%s</a>' %
                    (search_action_with_data(st, x, book_id, field),
                     _('Click to see books with {0}: {1}').format(
                         metadata['name'], a(x)), p(x)) for x in all_vals
                ]
                val = metadata['is_multiple']['list_to_ui'].join(links)
            elif metadata['datatype'] == 'text' or metadata[
                    'datatype'] == 'enumeration':
                # text/is_multiple handled above so no need to add the test to the if
                try:
                    st = metadata['search_terms'][0]
                except Exception:
                    st = field
                val = '<a href="%s" title="%s">%s</a>' % (
                    search_action_with_data(st, val, book_id, field),
                    a(
                        _('Click to see books with {0}: {1}').format(
                            metadata['name'], val)), p(val))

            ans.append((field, row % (name, val)))

    dc = getattr(mi, 'device_collections', [])
    if dc:
        dc = u', '.join(sorted(dc, key=sort_key))
        ans.append(('device_collections', row % (_('Collections') + ':', dc)))

    def classname(field):
        try:
            dt = mi.metadata_for_field(field)['datatype']
        except:
            dt = 'text'
        return 'datatype_%s' % dt

    ans = [
        u'<tr id="%s" class="%s">%s</tr>' %
        (fieldl.replace('#', '_'), classname(fieldl), html)
        for fieldl, html in ans
    ]
    # print '\n'.join(ans)
    direction = 'rtl' if rtl else 'ltr'
    margin = 'left' if rtl else 'right'
    return u'<style>table.fields td { vertical-align:top}</style>' + \
           u'<table class="fields" style="direction: %s; margin-%s:auto">%s</table>'%(
               direction, margin, u'\n'.join(ans)), comment_fields
Exemplo n.º 51
0
    def fb2_header(self):
        from calibre.ebooks.oeb.base import OPF
        metadata = {}
        metadata['title'] = self.oeb_book.metadata.title[0].value
        metadata['appname'] = __appname__
        metadata['version'] = __version__
        metadata['date'] = '%i.%i.%i' % (
            datetime.now().day, datetime.now().month, datetime.now().year)
        if self.oeb_book.metadata.language:
            lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value)
            if not lc:
                lc = self.oeb_book.metadata.language[0].value
            metadata['lang'] = lc or 'en'
        else:
            metadata['lang'] = u'en'
        metadata['id'] = None
        metadata['cover'] = self.get_cover()
        metadata['genre'] = self.opts.fb2_genre

        metadata['author'] = ''
        for auth in self.oeb_book.metadata.creator:
            author_first = ''
            author_middle = ''
            author_last = ''
            author_parts = auth.value.split(' ')
            if len(author_parts) == 1:
                author_last = author_parts[0]
            elif len(author_parts) == 2:
                author_first = author_parts[0]
                author_last = author_parts[1]
            else:
                author_first = author_parts[0]
                author_middle = ' '.join(author_parts[1:-1])
                author_last = author_parts[-1]
            metadata['author'] += '<author>'
            metadata[
                'author'] += '<first-name>%s</first-name>' % prepare_string_for_xml(
                    author_first)
            if author_middle:
                metadata[
                    'author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml(
                        author_middle)
            metadata[
                'author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(
                    author_last)
            metadata['author'] += '</author>'
        if not metadata['author']:
            metadata[
                'author'] = '<author><first-name></first-name><last-name></last-name></author>'

        metadata['keywords'] = ''
        tags = list(map(unicode_type, self.oeb_book.metadata.subject))
        if tags:
            tags = ', '.join(prepare_string_for_xml(x) for x in tags)
            metadata['keywords'] = '<keywords>%s</keywords>' % tags

        metadata['sequence'] = ''
        if self.oeb_book.metadata.series:
            index = '1'
            if self.oeb_book.metadata.series_index:
                index = self.oeb_book.metadata.series_index[0]
            metadata['sequence'] = '<sequence name="%s" number="%s"/>' % (
                prepare_string_for_xml(
                    '%s' % self.oeb_book.metadata.series[0]), index)

        year = publisher = isbn = ''
        identifiers = self.oeb_book.metadata['identifier']
        for x in identifiers:
            if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(
                    x).startswith('urn:uuid:'):
                metadata['id'] = unicode_type(x).split(':')[-1]
                break
        if metadata['id'] is None:
            self.log.warn('No UUID identifier found')
            metadata['id'] = unicode_type(uuid.uuid4())

        try:
            date = self.oeb_book.metadata['date'][0]
        except IndexError:
            pass
        else:
            year = '<year>%s</year>' % prepare_string_for_xml(
                date.value.partition('-')[0])

        try:
            publisher = self.oeb_book.metadata['publisher'][0]
        except IndexError:
            pass
        else:
            publisher = '<publisher>%s</publisher>' % prepare_string_for_xml(
                publisher.value)

        for x in identifiers:
            if x.get(OPF('scheme'), None).lower() == 'isbn':
                isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value)

        metadata['year'], metadata['isbn'], metadata[
            'publisher'] = year, isbn, publisher
        for key, value in metadata.items():
            if key not in ('author', 'cover', 'sequence', 'keywords', 'year',
                           'publisher', 'isbn'):
                metadata[key] = prepare_string_for_xml(value)

        try:
            comments = self.oeb_book.metadata['description'][0]
        except Exception:
            metadata['comments'] = ''
        else:
            from calibre.utils.html2text import html2text
            metadata['comments'] = '<annotation><p>{}</p></annotation>'.format(
                prepare_string_for_xml(html2text(comments.value).strip()))

        # Keep the indentation level of the description the same as the body.
        header = textwrap.dedent('''\
            <FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
            <description>
                <title-info>
                    <genre>%(genre)s</genre>
                    %(author)s
                    <book-title>%(title)s</book-title>
                    %(cover)s
                    <lang>%(lang)s</lang>
                    %(keywords)s
                    %(sequence)s
                    %(comments)s
                </title-info>
                <document-info>
                    %(author)s
                    <program-used>%(appname)s %(version)s</program-used>
                    <date>%(date)s</date>
                    <id>%(id)s</id>
                    <version>1.0</version>
                </document-info>
                <publish-info>
                    %(publisher)s
                    %(year)s
                    %(isbn)s
                </publish-info>
            </description>''') % metadata

        # Remove empty lines.
        return '\n'.join(filter(unicode_type.strip, header.splitlines()))
Exemplo n.º 52
0
def run_file_dialog(
        parent=None, title=None, initial_folder=None, filename=None, save_path=None,
        allow_multiple=False, only_dirs=False, confirm_overwrite=True, save_as=False, no_symlinks=False,
        file_types=(), default_ext=None, app_uid=None
):
    from calibre.gui2 import sanitize_env_vars
    secret = os.urandom(32).replace(b'\0', b' ')
    pipename = '\\\\.\\pipe\\%s' % uuid4()
    data = [serialize_string('PIPENAME', pipename), serialize_secret(secret)]
    parent = parent or None
    if parent is not None:
        data.append(serialize_hwnd(get_hwnd(parent)))
    if title:
        data.append(serialize_string('TITLE', title))
    if no_symlinks:
        data.append(serialize_binary('NO_SYMLINKS', no_symlinks))
    if save_as:
        data.append(serialize_binary('SAVE_AS', save_as))
        if confirm_overwrite:
            data.append(serialize_binary('CONFIRM_OVERWRITE', confirm_overwrite))
        if save_path is not None:
            save_path = process_path(save_path)
            if os.path.exists(save_path):
                data.append(serialize_string('SAVE_PATH', save_path))
            else:
                if not initial_folder:
                    initial_folder = select_initial_dir(save_path)
                if not filename:
                    filename = os.path.basename(save_path)
    else:
        if allow_multiple:
            data.append(serialize_binary('MULTISELECT', allow_multiple))
        if only_dirs:
            data.append(serialize_binary('ONLY_DIRS', only_dirs))
    if initial_folder is not None:
        initial_folder = process_path(initial_folder)
        if os.path.isdir(initial_folder):
            data.append(serialize_string('FOLDER', initial_folder))
    if filename:
        if isinstance(filename, bytes):
            filename = os.fsdecode(filename)
        data.append(serialize_string('FILENAME', filename))
    if only_dirs:
        file_types = ()  # file types not allowed for dir only dialogs
    elif not file_types:
        file_types = [(_('All files'), ('*',))]
    if file_types:
        data.append(serialize_file_types(file_types))
    if default_ext:
        data.append(serialize_string('DEFAULT_EXTENSION', default_ext))
    app_uid = app_uid or current_app_uid
    if app_uid:
        data.append(serialize_string('APP_UID', app_uid))

    from PyQt5.Qt import QEventLoop, Qt, pyqtSignal

    class Loop(QEventLoop):

        dialog_closed = pyqtSignal()

        def __init__(self):
            QEventLoop.__init__(self)
            self.dialog_closed.connect(self.exit, type=Qt.ConnectionType.QueuedConnection)

    loop = Loop()
    server = PipeServer(pipename)
    server.start()
    with sanitize_env_vars():
        h = Helper(subprocess.Popen(
            [HELPER], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE),
               data, loop.dialog_closed.emit)
    h.start()
    loop.exec_(QEventLoop.ProcessEventsFlag.ExcludeUserInputEvents)

    def decode(x):
        x = x or b''
        try:
            x = x.decode('utf-8')
        except Exception:
            x = repr(x)
        return x

    def get_errors():
        return decode(h.stdoutdata) + ' ' + decode(h.stderrdata)
    from calibre import prints
    from calibre.constants import DEBUG
    if DEBUG:
        prints('stdout+stderr from file dialog helper:', unicode_type([h.stdoutdata, h.stderrdata]))

    if h.rc != 0:
        raise Exception('File dialog failed (return code %s): %s' % (h.rc, get_errors()))
    server.join(2)
    if server.is_alive():
        raise Exception('Timed out waiting for read from pipe to complete')
    if server.err_msg:
        raise Exception(server.err_msg)
    if not server.data:
        return ()
    parts = list(filter(None, server.data.split(b'\0')))
    if DEBUG:
        prints('piped data from file dialog helper:', unicode_type(parts))
    if len(parts) < 2:
        return ()
    if parts[0] != secret:
        raise Exception('File dialog failed, incorrect secret received: ' + get_errors())

    from calibre_extensions.winutil import get_long_path_name

    def fix_path(x):
        u = os.path.abspath(x.decode('utf-8'))
        with suppress(Exception):
            try:
                return get_long_path_name(u)
            except FileNotFoundError:
                base, fn = os.path.split(u)
                return os.path.join(get_long_path_name(base), fn)
        return u

    ans = tuple(map(fix_path, parts[1:]))
    return ans
Exemplo n.º 53
0
def get_words(text, lang):
    try:
        ans = split_into_words(unicode_type(text), lang)
    except (TypeError, ValueError):
        return ()
    return list(filter(filter_words, ans))
Exemplo n.º 54
0
def get_name_components(name):
    components = list(filter(None, [t.strip() for t in name.split('.')]))
    if not components or '.'.join(components) != name:
        components = [name]
    return components
Exemplo n.º 55
0
    def format_field_extended(self, key, series_with_index=True):
        from calibre.ebooks.metadata import authors_to_string
        '''
        returns the tuple (display_name, formatted_value, original_value,
        field_metadata)
        '''
        from calibre.utils.date import format_date

        # Handle custom series index
        if key.startswith('#') and key.endswith('_index'):
            tkey = key[:-6]  # strip the _index
            cmeta = self.get_user_metadata(tkey, make_copy=False)
            if cmeta and cmeta['datatype'] == 'series':
                if self.get(tkey):
                    res = self.get_extra(tkey)
                    return (unicode_type(cmeta['name']+'_index'),
                            self.format_series_index(res), res, cmeta)
                else:
                    return (unicode_type(cmeta['name']+'_index'), '', '', cmeta)

        if key in self.custom_field_keys():
            res = self.get(key, None)       # get evaluates all necessary composites
            cmeta = self.get_user_metadata(key, make_copy=False)
            name = unicode_type(cmeta['name'])
            if res is None or res == '':    # can't check "not res" because of numeric fields
                return (name, res, None, None)
            orig_res = res
            datatype = cmeta['datatype']
            if datatype == 'text' and cmeta['is_multiple']:
                res = cmeta['is_multiple']['list_to_ui'].join(res)
            elif datatype == 'series' and series_with_index:
                if self.get_extra(key) is not None:
                    res = res + \
                        ' [%s]'%self.format_series_index(val=self.get_extra(key))
            elif datatype == 'datetime':
                res = format_date(res, cmeta['display'].get('date_format','dd MMM yyyy'))
            elif datatype == 'bool':
                res = _('Yes') if res else _('No')
            elif datatype == 'rating':
                res = '%.2g'%(res/2)
            elif datatype in ['int', 'float']:
                try:
                    fmt = cmeta['display'].get('number_format', None)
                    res = fmt.format(res)
                except:
                    pass
            return (name, unicode_type(res), orig_res, cmeta)

        # convert top-level ids into their value
        if key in TOP_LEVEL_IDENTIFIERS:
            fmeta = field_metadata['identifiers']
            name = key
            res = self.get(key, None)
            return (name, res, res, fmeta)

        # Translate aliases into the standard field name
        fmkey = field_metadata.search_term_to_field_key(key)
        if fmkey in field_metadata and field_metadata[fmkey]['kind'] == 'field':
            res = self.get(key, None)
            fmeta = field_metadata[fmkey]
            name = unicode_type(fmeta['name'])
            if res is None or res == '':
                return (name, res, None, None)
            orig_res = res
            name = unicode_type(fmeta['name'])
            datatype = fmeta['datatype']
            if key == 'authors':
                res = authors_to_string(res)
            elif key == 'series_index':
                res = self.format_series_index(res)
            elif datatype == 'text' and fmeta['is_multiple']:
                if isinstance(res, dict):
                    res = [k + ':' + v for k,v in res.items()]
                res = fmeta['is_multiple']['list_to_ui'].join(sorted(filter(None, res), key=sort_key))
            elif datatype == 'series' and series_with_index:
                res = res + ' [%s]'%self.format_series_index()
            elif datatype == 'datetime':
                res = format_date(res, fmeta['display'].get('date_format','dd MMM yyyy'))
            elif datatype == 'rating':
                res = '%.2g'%(res/2)
            elif key == 'size':
                res = human_readable(res)
            return (name, unicode_type(res), orig_res, fmeta)

        return (None, None, None, None)
Exemplo n.º 56
0
 def add_hrefs(self, data):
     # kindlegen inserts a trailing | after the last href
     self.hrefs = list(filter(None, data.decode('utf-8').split('|')))
Exemplo n.º 57
0
def ACQUISITION_ENTRY(book_id, updated, request_context):
    field_metadata = request_context.db.field_metadata
    mi = request_context.db.get_metadata(book_id)
    extra = []
    if (mi.rating or 0) > 0:
        rating = rating_to_stars(mi.rating)
        extra.append(_('RATING: %s<br />') % rating)
    if mi.tags:
        extra.append(
            _('TAGS: %s<br />') % xml(format_tag_string(mi.tags, None)))
    if mi.series:
        extra.append(
            _('SERIES: %(series)s [%(sidx)s]<br />') %
            dict(series=xml(mi.series), sidx=fmt_sidx(float(mi.series_index))))
    for key in filter(request_context.ctx.is_field_displayable,
                      field_metadata.ignorable_field_keys()):
        name, val = mi.format_field(key)
        if val:
            fm = field_metadata[key]
            datatype = fm['datatype']
            if datatype == 'text' and fm['is_multiple']:
                extra.append(
                    '%s: %s<br />' %
                    (xml(name),
                     xml(
                         format_tag_string(
                             val,
                             fm['is_multiple']['ui_to_list'],
                             joinval=fm['is_multiple']['list_to_ui']))))
            elif datatype == 'comments' or (fm['datatype'] == 'composite'
                                            and fm['display'].get(
                                                'contains_html', False)):
                extra.append('%s: %s<br />' %
                             (xml(name), comments_to_html(unicode_type(val))))
            else:
                extra.append('%s: %s<br />' %
                             (xml(name), xml(unicode_type(val))))
    if mi.comments:
        comments = comments_to_html(mi.comments)
        extra.append(comments)
    if extra:
        extra = html_to_lxml('\n'.join(extra))
    ans = E.entry(TITLE(mi.title),
                  E.author(E.name(authors_to_string(mi.authors))),
                  ID('urn:uuid:' + mi.uuid), UPDATED(mi.last_modified),
                  E.published(mi.timestamp.isoformat()))
    if mi.pubdate and not is_date_undefined(mi.pubdate):
        ans.append(ans.makeelement('{%s}date' % DC_NS))
        ans[-1].text = mi.pubdate.isoformat()
    if len(extra):
        ans.append(E.content(extra, type='xhtml'))
    get = partial(request_context.ctx.url_for,
                  '/get',
                  book_id=book_id,
                  library_id=request_context.library_id)
    if mi.formats:
        fm = mi.format_metadata
        for fmt in mi.formats:
            fmt = fmt.lower()
            mt = guess_type('a.' + fmt)[0]
            if mt:
                link = E.link(type=mt,
                              href=get(what=fmt),
                              rel="http://opds-spec.org/acquisition")
                ffm = fm.get(fmt.upper())
                if ffm:
                    link.set('length', unicode_type(ffm['size']))
                    link.set('mtime', ffm['mtime'].isoformat())
                ans.append(link)
    ans.append(
        E.link(type='image/jpeg',
               href=get(what='cover'),
               rel="http://opds-spec.org/cover"))
    ans.append(
        E.link(type='image/jpeg',
               href=get(what='thumb'),
               rel="http://opds-spec.org/thumbnail"))
    ans.append(
        E.link(type='image/jpeg',
               href=get(what='cover'),
               rel="http://opds-spec.org/image"))
    ans.append(
        E.link(type='image/jpeg',
               href=get(what='thumb'),
               rel="http://opds-spec.org/image/thumbnail"))

    return ans
Exemplo n.º 58
0
    def epubify_markup(self, root, log):
        from calibre.ebooks.oeb.base import XPath, XHTML
        # Fix empty title tags
        for t in XPath('//h:title')(root):
            if not t.text:
                t.text = u' '
        # Fix <p><div> constructs as the asinine epubchecker complains
        # about them
        pdiv = XPath('//h:p/h:div')
        for div in pdiv(root):
            div.getparent().tag = XHTML('div')

        # Remove the position:relative as it causes problems with some epub
        # renderers. Remove display: block on an image inside a div as it is
        # redundant and prevents text-align:center from working in ADE
        # Also ensure that the img is contained in its containing div
        imgpath = XPath('//h:div/h:img[@style]')
        for img in imgpath(root):
            div = img.getparent()
            if len(div) == 1:
                style = div.attrib.get('style', '')
                if style and not style.endswith(';'):
                    style = style + ';'
                style += 'position:static'  # Ensures position of containing div is static
                # Ensure that the img is always contained in its frame
                div.attrib['style'] = style
                img.attrib['style'] = 'max-width: 100%; max-height: 100%'

        # Handle anchored images. The default markup + CSS produced by
        # odf2xhtml works with WebKit but not with ADE. So we convert the
        # common cases of left/right/center aligned block images to work on
        # both webkit and ADE. We detect the case of setting the side margins
        # to auto and map it to an appropriate text-align directive, which
        # works in both WebKit and ADE.
        # https://bugs.launchpad.net/bugs/1063207
        # https://bugs.launchpad.net/calibre/+bug/859343
        imgpath = XPath('descendant::h:div/h:div/h:img')
        for img in imgpath(root):
            div2 = img.getparent()
            div1 = div2.getparent()
            if (len(div1), len(div2)) != (1, 1):
                continue
            cls = div1.get('class', '')
            first_rules = list(
                filter(None, [self.get_css_for_class(x) for x in cls.split()]))
            has_align = False
            for r in first_rules:
                if r.style.getProperty(u'text-align') is not None:
                    has_align = True
            ml = mr = None
            if not has_align:
                aval = None
                cls = div2.get(u'class', u'')
                rules = list(
                    filter(None,
                           [self.get_css_for_class(x) for x in cls.split()]))
                for r in rules:
                    ml = r.style.getPropertyCSSValue(u'margin-left') or ml
                    mr = r.style.getPropertyCSSValue(u'margin-right') or mr
                    ml = getattr(ml, 'value', None)
                    mr = getattr(mr, 'value', None)
                if ml == mr == u'auto':
                    aval = u'center'
                elif ml == u'auto' and mr != u'auto':
                    aval = 'right'
                elif ml != u'auto' and mr == u'auto':
                    aval = 'left'
                if aval is not None:
                    style = div1.attrib.get('style', '').strip()
                    if style and not style.endswith(';'):
                        style = style + ';'
                    style += 'text-align:%s' % aval
                    has_align = True
                    div1.attrib['style'] = style

            if has_align:
                # This is needed for ADE, without it the text-align has no
                # effect
                style = div2.attrib['style']
                div2.attrib['style'] = 'display:inline;' + style
Exemplo n.º 59
0
    def finalize(self, all_blocks):
        block_counts, run_counts = Counter(), Counter()
        block_rmap, run_rmap = defaultdict(list), defaultdict(list)
        used_pairs = defaultdict(list)
        heading_styles = defaultdict(list)
        headings = frozenset('h1 h2 h3 h4 h5 h6'.split())
        pure_block_styles = set()

        for block in all_blocks:
            bs = block.style
            block_counts[bs] += 1
            block_rmap[block.style].append(block)
            local_run_counts = Counter()
            for run in block.runs:
                count = run.style_weight
                run_counts[run.style] += count
                local_run_counts[run.style] += count
                run_rmap[run.style].append(run)
            if local_run_counts:
                rs = local_run_counts.most_common(1)[0][0]
                used_pairs[(bs, rs)].append(block)
                if block.html_tag in headings:
                    heading_styles[block.html_tag].append((bs, rs))
            else:
                pure_block_styles.add(bs)

        self.pure_block_styles = sorted(pure_block_styles,
                                        key=block_counts.__getitem__)
        bnum = len(unicode_type(max(1, len(pure_block_styles) - 1)))
        for i, bs in enumerate(self.pure_block_styles):
            bs.id = bs.name = '%0{}d Block'.format(bnum) % i
            bs.seq = i
            if i == 0:
                self.normal_pure_block_style = bs

        counts = Counter()
        smap = {}
        for (bs, rs), blocks in iteritems(used_pairs):
            s = CombinedStyle(bs, rs, blocks, self.namespace)
            smap[(bs, rs)] = s
            counts[s] += sum(1 for b in blocks if not b.is_empty())
        for i, heading_tag in enumerate(sorted(heading_styles)):
            styles = sorted((smap[k] for k in heading_styles[heading_tag]),
                            key=counts.__getitem__)
            styles = list(filter(lambda s: s.outline_level is None, styles))
            if styles:
                heading_style = styles[-1]
                heading_style.outline_level = i

        snum = len(unicode_type(max(1, len(counts) - 1)))
        heading_styles = []
        for i, (style, count) in enumerate(counts.most_common()):
            if i == 0:
                self.normal_style = style
                style.id = style.name = 'Normal'
            else:
                if style.outline_level is None:
                    val = 'Para %0{}d'.format(snum) % i
                else:
                    val = 'Heading %d' % (style.outline_level + 1)
                    heading_styles.append(style)
                style.id = style.name = val
            style.seq = i
        self.combined_styles = sorted(counts, key=attrgetter('seq'))
        [ls.apply() for ls in self.combined_styles]

        descendant_style_map = {}
        ds_counts = Counter()
        for block in all_blocks:
            for run in block.runs:
                if run.parent_style is not run.style and run.parent_style and run.style:
                    ds = DescendantTextStyle(run.parent_style, run.style)
                    if ds.properties:
                        run.descendant_style = descendant_style_map.get(ds)
                        if run.descendant_style is None:
                            run.descendant_style = descendant_style_map[
                                ds] = ds
                        ds_counts[run.descendant_style] += run.style_weight
        rnum = len(unicode_type(max(1, len(ds_counts) - 1)))
        for i, (text_style, count) in enumerate(ds_counts.most_common()):
            text_style.id = 'Text%d' % i
            text_style.name = '%0{}d Text'.format(rnum) % i
            text_style.seq = i
        self.descendant_text_styles = sorted(descendant_style_map,
                                             key=attrgetter('seq'))

        self.log.debug(
            '%d Text Styles %d Combined styles' %
            tuple(map(len,
                      (self.descendant_text_styles, self.combined_styles))))

        self.primary_heading_style = None
        if heading_styles:
            heading_styles.sort(key=attrgetter('outline_level'))
            self.primary_heading_style = heading_styles[0]
        else:
            ms = 0
            for s in self.combined_styles:
                if s.rs.font_size > ms:
                    self.primary_heading_style = s
                    ms = s.rs.font_size
Exemplo n.º 60
0
def read_metadata_kfx(stream, read_cover=True):
    ' Read the metadata.kfx file that is found in the sdr book folder for KFX files '
    c = Container(stream.read())
    m = extract_metadata(c.decode())

    # dump_metadata(m)

    def has(x):
        return m[x] and m[x][0]

    def get(x, single=True):
        ans = m[x]
        if single:
            ans = clean_xml_chars(ans[0]) if ans else ''
        else:
            ans = [clean_xml_chars(y) for y in ans]
        return ans

    title = get('title') or _('Unknown')
    authors = get('author', False) or [_('Unknown')]
    auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$')

    def fix_author(x):
        if tweaks['author_sort_copy_method'] != 'copy':
            m = auth_pat.match(x.strip())
            if m is not None:
                return m.group(2) + ' ' + m.group(1)
        return x

    unique_authors = []  # remove duplicates while retaining order
    for f in [fix_author(x) for x in authors]:
        if f not in unique_authors:
            unique_authors.append(f)

    mi = Metadata(title, unique_authors)
    if has('author'):
        mi.author_sort = get('author')
    if has('ASIN'):
        mi.set_identifier('mobi-asin', get('ASIN'))
    elif has('content_id'):
        mi.set_identifier('mobi-asin', get('content_id'))
    if has('languages'):
        langs = list(
            filter(None,
                   (canonicalize_lang(x) for x in get('languages', False))))
        if langs:
            mi.languages = langs
    if has('issue_date'):
        try:
            mi.pubdate = parse_only_date(get('issue_date'))
        except Exception:
            pass
    if has('publisher') and get('publisher') != 'Unknown':
        mi.publisher = get('publisher')
    if read_cover and m[COVER_KEY]:
        try:
            data = from_base64_bytes(m[COVER_KEY])
            fmt, w, h = identify(data)
        except Exception:
            w, h, fmt = 0, 0, None
        if fmt and w > -1 and h > -1:
            mi.cover_data = (fmt, data)

    return mi