Пример #1
0
    def href(self, basedir=None):
        '''
        Return a URL pointing to this resource. If it is a file on the filesystem
        the URL is relative to `basedir`.

        `basedir`: If None, the basedir of this resource is used (see :method:`set_basedir`).
        If this resource has no basedir, then the current working directory is used as the basedir.
        '''
        if basedir is None:
            if self._basedir:
                basedir = self._basedir
            else:
                basedir = os.getcwdu()
        if self.path is None:
            return self._href
        f = self.fragment.encode('utf-8') if isinstance(
            self.fragment, unicode_type) else self.fragment
        frag = '#' + quote(f) if self.fragment else ''
        if self.path == basedir:
            return '' + frag
        try:
            rpath = relpath(self.path, basedir)
        except OSError:  # On windows path and basedir could be on different drives
            rpath = self.path
        if isinstance(rpath, unicode_type):
            rpath = rpath.encode('utf-8')
        return quote(rpath.replace(os.sep, '/')) + frag
Пример #2
0
    def href(self, basedir=None):
        '''
        Return a URL pointing to this resource. If it is a file on the filesystem
        the URL is relative to `basedir`.

        `basedir`: If None, the basedir of this resource is used (see :method:`set_basedir`).
        If this resource has no basedir, then the current working directory is used as the basedir.
        '''
        if basedir is None:
            if self._basedir:
                basedir = self._basedir
            else:
                basedir = os.getcwdu()
        if self.path is None:
            return self._href
        f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
        frag = '#'+quote(f) if self.fragment else ''
        if self.path == basedir:
            return ''+frag
        try:
            rpath = relpath(self.path, basedir)
        except OSError:  # On windows path and basedir could be on different drives
            rpath = self.path
        if isinstance(rpath, unicode_type):
            rpath = rpath.encode('utf-8')
        return quote(rpath.replace(os.sep, '/'))+frag
Пример #3
0
 def link_replacer(base, url):
     if url.startswith('#'):
         frag = urlunquote(url[1:])
         if not frag:
             return url
         changed.add(base)
         return resource_template.format(encode_url(base, frag))
     purl = urlparse(url)
     if purl.netloc or purl.query:
         return url
     if purl.scheme and purl.scheme != 'file':
         return url
     if not purl.path or purl.path.startswith('/'):
         return url
     url, frag = purl.path, purl.fragment
     name = container.href_to_name(url, base)
     if name:
         if container.has_name_and_is_not_empty(name):
             frag = urlunquote(frag)
             url = resource_template.format(encode_url(name, frag))
         else:
             if isinstance(name, unicode_type):
                 name = name.encode('utf-8')
             url = 'missing:' + force_unicode(quote(name), 'utf-8')
         changed.add(base)
     return url
Пример #4
0
 def link_replacer(base, url):
     if url.startswith('#'):
         frag = urlunquote(url[1:])
         if not frag:
             return url
         changed.add(base)
         return resource_template.format(encode_url(base, frag))
     purl = urlparse(url)
     if purl.netloc or purl.query:
         return url
     if purl.scheme and purl.scheme != 'file':
         return url
     if not purl.path or purl.path.startswith('/'):
         return url
     url, frag = purl.path, purl.fragment
     name = self.href_to_name(url, base)
     if name:
         if self.has_name_and_is_not_empty(name):
             frag = urlunquote(frag)
             url = resource_template.format(encode_url(name, frag))
         else:
             if isinstance(name, unicode_type):
                 name = name.encode('utf-8')
             url = 'missing:' + force_unicode(quote(name), 'utf-8')
         changed.add(base)
     return url
Пример #5
0
def qquote(val, use_plus=True):
    if not isinstance(val, bytes):
        val = val.encode('utf-8')
    ans = quote_plus(val) if use_plus else quote(val)
    if isinstance(ans, bytes):
        ans = ans.decode('utf-8')
    return ans
Пример #6
0
    def resource_adder(self, link_, base=None):
        from polyglot.urllib import quote
        link, frag = self.link_to_local_path(link_, base=base)
        if link is None:
            return link_
        try:
            if base and not os.path.isabs(link):
                link = os.path.join(base, link)
            link = os.path.abspath(link)
        except:
            return link_
        if not os.access(link, os.R_OK):
            return link_
        if os.path.isdir(link):
            self.log.warn(link_, 'is a link to a directory. Ignoring.')
            return link_
        if not self.is_case_sensitive(tempfile.gettempdir()):
            link = link.lower()
        if link not in self.added_resources:
            bhref = os.path.basename(link)
            id, href = self.oeb.manifest.generate(
                id='added', href=sanitize_file_name(bhref))
            guessed = self.guess_type(href)[0]
            media_type = guessed or self.BINARY_MIME
            if media_type == 'text/plain':
                self.log.warn('Ignoring link to text file %r' % link_)
                return None
            if media_type == self.BINARY_MIME:
                # Check for the common case, images
                try:
                    img = what(link)
                except EnvironmentError:
                    pass
                else:
                    if img:
                        media_type = self.guess_type(
                            'dummy.' + img)[0] or self.BINARY_MIME

            self.oeb.log.debug('Added', link)
            self.oeb.container = self.DirContainer(os.path.dirname(link),
                                                   self.oeb.log,
                                                   ignore_opf=True)
            # Load into memory
            item = self.oeb.manifest.add(id, href, media_type)
            # bhref refers to an already existing file. The read() method of
            # DirContainer will call unquote on it before trying to read the
            # file, therefore we quote it here.
            if isinstance(bhref, unicode_type):
                bhref = bhref.encode('utf-8')
            item.html_input_href = as_unicode(quote(bhref))
            if guessed in self.OEB_STYLES:
                item.override_css_fetch = partial(self.css_import_handler,
                                                  os.path.dirname(link))
            item.data
            self.added_resources[link] = href

        nlink = self.added_resources[link]
        if frag:
            nlink = '#'.join((nlink, frag))
        return nlink
Пример #7
0
def canonicalize_url(url):
    # mechanize does not handle quoting automatically
    if re.search(r'\s+', url) is not None:
        purl = list(urlparse(url))
        for i in range(2, 6):
            purl[i] = quote(purl[i])
        url = urlunparse(purl)
    return url
Пример #8
0
    def fetch_url(self, url):
        data = None
        self.log.debug('Fetching', url)
        st = time.time()

        # Check for a URL pointing to the local filesystem and special case it
        # for efficiency and robustness. Bypasses delay checking as it does not
        # apply to local fetches. Ensures that unicode paths that are not
        # representable in the filesystem_encoding work.
        is_local = 0
        if url.startswith('file://'):
            is_local = 7
        elif url.startswith('file:'):
            is_local = 5
        if is_local > 0:
            url = url[is_local:]
            if iswindows and url.startswith('/'):
                url = url[1:]
            with open(url, 'rb') as f:
                data = response(f.read())
                data.newurl = 'file:'+url  # This is what mechanize does for
                # local URLs
            self.log.debug('Fetched %s in %.1f seconds' % (url, time.time() - st))
            return data

        delta = time.time() - self.last_fetch_at
        if delta < self.delay:
            time.sleep(self.delay - delta)
        # mechanize does not handle quoting automatically
        if re.search(r'\s+', url) is not None:
            if isinstance(url, unicode_type):
                url = url.encode('utf-8')
            purl = list(urlparse(url))
            for i in range(2, 6):
                purl[i] = quote(purl[i])
            url = urlunparse(purl).decode('utf-8')
        open_func = getattr(self.browser, 'open_novisit', self.browser.open)
        try:
            with closing(open_func(url, timeout=self.timeout)) as f:
                data = response(f.read()+f.read())
                data.newurl = f.geturl()
        except URLError as err:
            if hasattr(err, 'code') and err.code in responses:
                raise FetchError(responses[err.code])
            if getattr(err, 'reason', [0])[0] == 104 or \
                getattr(getattr(err, 'args', [None])[0], 'errno', None) in (-2,
                        -3):  # Connection reset by peer or Name or service not known
                self.log.debug('Temporary error, retrying in 1 second')
                time.sleep(1)
                with closing(open_func(url, timeout=self.timeout)) as f:
                    data = response(f.read()+f.read())
                    data.newurl = f.geturl()
            else:
                raise err
        finally:
            self.last_fetch_at = time.time()
        self.log.debug('Fetched %s in %f seconds' % (url, time.time() - st))
        return data
Пример #9
0
    def resource_adder(self, link_, base=None):
        from polyglot.urllib import quote
        link, frag = self.link_to_local_path(link_, base=base)
        if link is None:
            return link_
        try:
            if base and not os.path.isabs(link):
                link = os.path.join(base, link)
            link = os.path.abspath(link)
        except:
            return link_
        if not os.access(link, os.R_OK):
            return link_
        if os.path.isdir(link):
            self.log.warn(link_, 'is a link to a directory. Ignoring.')
            return link_
        if not self.is_case_sensitive(tempfile.gettempdir()):
            link = link.lower()
        if link not in self.added_resources:
            bhref = os.path.basename(link)
            id, href = self.oeb.manifest.generate(id='added', href=sanitize_file_name(bhref))
            guessed = self.guess_type(href)[0]
            media_type = guessed or self.BINARY_MIME
            if media_type == 'text/plain':
                self.log.warn('Ignoring link to text file %r'%link_)
                return None
            if media_type == self.BINARY_MIME:
                # Check for the common case, images
                try:
                    img = what(link)
                except EnvironmentError:
                    pass
                else:
                    if img:
                        media_type = self.guess_type('dummy.'+img)[0] or self.BINARY_MIME

            self.oeb.log.debug('Added', link)
            self.oeb.container = self.DirContainer(os.path.dirname(link),
                    self.oeb.log, ignore_opf=True)
            # Load into memory
            item = self.oeb.manifest.add(id, href, media_type)
            # bhref refers to an already existing file. The read() method of
            # DirContainer will call unquote on it before trying to read the
            # file, therefore we quote it here.
            if isinstance(bhref, unicode_type):
                bhref = bhref.encode('utf-8')
            item.html_input_href = unicode_type(quote(bhref))
            if guessed in self.OEB_STYLES:
                item.override_css_fetch = partial(
                        self.css_import_handler, os.path.dirname(link))
            item.data
            self.added_resources[link] = href

        nlink = self.added_resources[link]
        if frag:
            nlink = '#'.join((nlink, frag))
        return nlink
Пример #10
0
def canonicalize_url(url):
    # mechanize does not handle quoting automatically
    if re.search(r'\s+', url) is not None:
        if isinstance(url, unicode_type):
            url = url.encode('utf-8')
        purl = list(urlparse(url))
        for i in range(2, 6):
            purl[i] = as_bytes(quote(purl[i]))
        url = urlunparse(purl).decode('utf-8')
    return url
Пример #11
0
def book_filename(rd, book_id, mi, fmt, as_encoded_unicode=False):
    au = authors_to_string(mi.authors or [_('Unknown')])
    title = mi.title or _('Unknown')
    ext = (fmt or '').lower()
    if ext == 'kepub' and 'Kobo Touch' in rd.inheaders.get('User-Agent', ''):
        ext = 'kepub.epub'
    fname = '%s - %s_%s.%s' % (title[:30], au[:30], book_id, ext)
    if as_encoded_unicode:
        # See https://tools.ietf.org/html/rfc6266
        fname = sanitize_file_name_unicode(fname).encode('utf-8')
        fname = quote(fname).decode('ascii')
    else:
        fname = ascii_filename(fname).replace('"', '_')
    return fname
Пример #12
0
def icon_map():
    global _icon_map
    with _icon_map_lock:
        if _icon_map is None:
            from calibre.gui2 import gprefs
            _icon_map = category_icon_map.copy()
            custom_icons = gprefs.get('tags_browser_category_icons', {})
            for k, v in iteritems(custom_icons):
                if os.access(os.path.join(config_dir, 'tb_icons', v), os.R_OK):
                    _icon_map[k] = '_' + quote(v)
            _icon_map['file_type_icons'] = {
                k:'mimetypes/%s.png' % v for k, v in iteritems(EXT_MAP)
            }
        return _icon_map
Пример #13
0
def icon_map():
    global _icon_map
    with _icon_map_lock:
        if _icon_map is None:
            from calibre.gui2 import gprefs
            _icon_map = category_icon_map.copy()
            custom_icons = gprefs.get('tags_browser_category_icons', {})
            for k, v in iteritems(custom_icons):
                if os.access(os.path.join(config_dir, 'tb_icons', v), os.R_OK):
                    _icon_map[k] = '_' + quote(v)
            _icon_map['file_type_icons'] = {
                k:'mimetypes/%s.png' % v for k, v in iteritems(EXT_MAP)
            }
        return _icon_map
Пример #14
0
def book_filename(rd, book_id, mi, fmt, as_encoded_unicode=False):
    au = authors_to_string(mi.authors or [_('Unknown')])
    title = mi.title or _('Unknown')
    ext = (fmt or '').lower()
    if ext == 'kepub' and 'Kobo Touch' in rd.inheaders.get('User-Agent', ''):
        ext = 'kepub.epub'
    fname = '%s - %s_%s.%s' % (title[:30], au[:30], book_id, ext)
    if as_encoded_unicode:
        # See https://tools.ietf.org/html/rfc6266
        fname = sanitize_file_name(fname).encode('utf-8')
        fname = unicode_type(quote(fname))
    else:
        fname = ascii_filename(fname).replace('"', '_')
    return fname
Пример #15
0
def book_filename(rd, book_id, mi, fmt, as_encoded_unicode=False):
    au = authors_to_string(mi.authors or [_('Unknown')])
    title = mi.title or _('Unknown')
    ext = (fmt or '').lower()
    fname = f'{title[:30]} - {au[:30]}_{book_id}.{ext}'
    if as_encoded_unicode:
        # See https://tools.ietf.org/html/rfc6266
        fname = sanitize_file_name(fname).encode('utf-8')
        fname = str(quote(fname))
    else:
        fname = ascii_filename(fname).replace('"', '_')
    if ext == 'kepub' and 'Kobo Touch' in rd.inheaders.get('User-Agent', ''):
        fname = fname.replace('!', '_')
        fname += '.epub'
    return fname
Пример #16
0
 def a(filename,
       data=None,
       status=OK,
       method='POST',
       username='******',
       add_duplicates='n',
       job_id=1):
     r, data = make_request(conn,
                            '/cdb/add-book/{}/{}/{}'.format(
                                job_id, add_duplicates,
                                quote(filename.encode('utf-8'))),
                            username=username,
                            password='******',
                            prefix='',
                            method=method,
                            data=data)
     ae(status, r.status)
     return data
Пример #17
0
def uniquote(raw):
    if isinstance(raw, str):
        raw = raw.encode('utf-8')
    return str(quote(raw))
Пример #18
0
def urls_from_identifiers(identifiers):  # {{{
    identifiers = {k.lower(): v for k, v in iteritems(identifiers)}
    ans = []
    keys_left = set(identifiers)

    def add(name, k, val, url):
        ans.append((name, k, val, url))
        keys_left.discard(k)

    rules = msprefs['id_link_rules']
    if rules:
        formatter = EvalFormatter()
        for k, val in iteritems(identifiers):
            val = val.replace('|', ',')
            vals = {
                'id':
                unicode_type(
                    quote(val if isinstance(val, bytes) else val.encode('utf-8'
                                                                        )))
            }
            items = rules.get(k) or ()
            for name, template in items:
                try:
                    url = formatter.safe_format(template, vals, '', vals)
                except Exception:
                    import traceback
                    traceback.format_exc()
                    continue
                add(name, k, val, url)
    for plugin in all_metadata_plugins():
        try:
            for id_type, id_val, url in plugin.get_book_urls(identifiers):
                add(plugin.get_book_url_name(id_type, id_val, url), id_type,
                    id_val, url)
        except Exception:
            pass
    isbn = identifiers.get('isbn', None)
    if isbn:
        add(isbn, 'isbn', isbn, 'https://www.worldcat.org/isbn/' + isbn)
    doi = identifiers.get('doi', None)
    if doi:
        add('DOI', 'doi', doi, 'https://dx.doi.org/' + doi)
    arxiv = identifiers.get('arxiv', None)
    if arxiv:
        add('arXiv', 'arxiv', arxiv, 'https://arxiv.org/abs/' + arxiv)
    oclc = identifiers.get('oclc', None)
    if oclc:
        add('OCLC', 'oclc', oclc, 'https://www.worldcat.org/oclc/' + oclc)
    issn = check_issn(identifiers.get('issn', None))
    if issn:
        add(issn, 'issn', issn, 'https://www.worldcat.org/issn/' + issn)
    q = {'http', 'https', 'file'}
    for k, url in iteritems(identifiers):
        if url and re.match(r'ur[il]\d*$', k) is not None:
            url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
            if url.partition(':')[0].lower() in q:
                parts = urlparse(url)
                name = parts.netloc or parts.path
                add(name, k, url, url)
    for k in tuple(keys_left):
        val = identifiers.get(k)
        if val:
            url = val[:8].replace('|', ':') + val[8:].replace('|', ',')
            if url.partition(':')[0].lower() in q:
                parts = urlparse(url)
                name = parts.netloc or parts.path
                add(name, k, url, url)
    return ans
Пример #19
0
def uniquote(raw):
    if isinstance(raw, unicode_type):
        raw = raw.encode('utf-8')
    return unicode_type(quote(raw))
Пример #20
0
def uniquote(raw):
    if isinstance(raw, unicode_type):
        raw = raw.encode('utf-8')
    return unicode_type(quote(raw))
Пример #21
0
 def a(filename, data=None, status=OK, method='POST', username='******', add_duplicates='n', job_id=1):
     r, data = make_request(conn, '/cdb/add-book/{}/{}/{}'.format(job_id, add_duplicates, quote(filename.encode('utf-8'))),
                            username=username, password='******', prefix='', method=method, data=data)
     ae(status, r.status)
     return data