示例#1
0
文件: qfurl.py 项目: engla/kupfer
 def __init__(self, obj=None, url=None):
     """Create a new qfurl for object @obj"""
     if obj:
         typname = "%s.%s" % (type(obj).__module__, type(obj).__name__)
         try:
             qfid = obj.qf_id
         except AttributeError:
             raise QfurlError("%s has no qfurl" % obj)
         self.url = _urlunparse((QFURL_SCHEME, "", qfid, "", "", typname))
     else:
         self.url = url
示例#2
0
文件: qfurl.py 项目: guns/kupfer
 def __init__(self, obj=None, url=None):
     """Create a new qfurl for object @obj"""
     if obj:
         typname = "%s.%s" % (type(obj).__module__, type(obj).__name__)
         try:
             qfid = obj.qf_id
         except AttributeError:
             raise QfurlError("%s has no qfurl" % obj)
         self.url = _urlunparse((QFURL_SCHEME, "", qfid, "", "", typname))
     else:
         self.url = url
示例#3
0
def redacted_urlunparse(url, redact_with="***"):
    """``urlunparse()`` but redact password."""
    netloc = []
    if url.username:
        netloc.append(url.username)
    if url.password:
        netloc.append(":")
        netloc.append(redact_with)
    if url.hostname:
        if netloc:
            netloc.append("@")
        netloc.append(url.hostname)
    url = url._replace(netloc="".join(netloc))
    return _urlunparse(url)
示例#4
0
def escape_channel_url(channel):
    if channel.startswith("file:"):
        if "%" in channel:  # it's escaped already
            return channel
        if on_win:
            channel = channel.replace("\\", "/")
    parts = _urlparse(channel)
    if parts.scheme:
        components = parts.path.split("/")
        if on_win:
            if len(parts.netloc) == 2 and parts.netloc[1] == ":":
                # with absolute paths (e.g. C:/something), C:, D:, etc might get parsed as netloc
                path = "/".join([parts.netloc] +
                                [quote(p) for p in components])
                parts = parts._replace(netloc="")
            else:
                path = "/".join(components[:2] +
                                [quote(p) for p in components[2:]])
        else:
            path = "/".join([quote(p) for p in components])
        parts = parts._replace(path=path)
        return _urlunparse(parts)
    return channel
示例#5
0
文件: client.py 项目: 0xfab/scrapy
 def urlunparse(parts):
     result = _urlunparse(tuple([p.decode("charmap") for p in parts]))
     return result.encode("charmap")
示例#6
0
 def urlunparse(parts):
     result = _urlunparse(tuple([p.decode("charmap") for p in parts]))
     return result.encode("charmap")
示例#7
0
文件: data.py 项目: pythseq/scelvis
def download_file(url, path=None, *more_components):
    """Download file (if necessary) and yield filename if necessary."""
    if path:
        path = fs.path.join(path, *more_components)
        full_path = fs.path.join(url.path, path)
    else:
        full_path = url.path
        path = fs.path.basename(url.path)
        url = url._replace(path=fs.path.dirname(url.path))
    if url.scheme == "file":
        if os.path.exists(full_path):
            yield full_path
        else:
            yield None
    else:
        basename = fs.path.basename(full_path)
        if url.scheme in PYFS_SCHEMES:
            src_fs = make_fs(url)
            # Download file if it exists.
            if not src_fs.exists(path):
                yield None
            else:
                with TempFS() as tmpfs:
                    logger.info("Downloading file %s from %s" % (path, redacted_urlunparse(url)))
                    with open(tmpfs.getospath(basename), "wb") as tmpf:
                        src_fs.download(path, tmpf)
                    logger.info("Download complete.")
                    yield tmpfs.getospath(basename)
                    logger.info("Releasing %s" % tmpfs)
        elif url.scheme == "s3":
            logger.info("Connecting via S3...")
            anon = url.username is None and url.password is None
            s3 = s3fs.S3FileSystem(anon=anon, key=url.username, secret=url.password)
            with TempFS() as tmpfs:
                logger.info("Downloading file %s from %s" % (path, redacted_urlunparse(url)))
                with s3.open("%s/%s" % (url.hostname, path), "rb") as inputf:
                    with open(tmpfs.getospath(basename), "wb") as outputf:
                        shutil.copyfileobj(inputf, outputf, settings.MAX_UPLOAD_DATA_SIZE)
                logger.info("Download complete.")
                yield tmpfs.getospath(basename)
                logger.info("Releasing %s" % tmpfs)
        elif url.scheme.startswith("http"):
            logger.info("Downloading via HTTP(S)...")
            with TempFS() as tmpfs:
                logger.info("Downloading file %s from %s" % (path, redacted_urlunparse(url)))
                r = requests.get(_urlunparse(url._replace(path=full_path)), allow_redirects=True)
                r.raise_for_status()
                with open(tmpfs.getospath(basename), "wb") as outputf:
                    outputf.write(r.content)
                logger.info("Download complete.")
                yield tmpfs.getospath(basename)
                logger.info("Releasing %s" % tmpfs)
        elif url.scheme.startswith("irods"):
            with create_irods_session(url) as irods_session:
                logger.info("Downloading file...")
                with TempFS() as tmpfs:
                    logger.info("Downloading file %s from %s" % (path, redacted_urlunparse(url)))
                    path_tmp = tmpfs.getospath(basename)
                    collection = irods_session.collections.get(fs.path.dirname(full_path))
                    name = fs.path.basename(full_path)
                    for data_object in collection.data_objects:
                        if data_object.name == name:
                            with data_object.open() as inputf:
                                with open(tmpfs.getospath(basename), "wb") as outputf:
                                    shutil.copyfileobj(
                                        inputf, outputf, settings.MAX_UPLOAD_DATA_SIZE
                                    )
                                    break
                    else:
                        raise ScelVisException(
                            "Could not find %s in %s" % (full_path, redacted_urlunparse(url))
                        )
                    logger.info("Download complete.")
                    yield path_tmp
                    logger.info("Releasing %s" % tmpfs)
        else:
            raise ScelVisException("Invalid URL scheme: %s" % url.scheme)