def __init__(self, obj=None, url=None): """Create a new qfurl for object @obj""" if obj: typname = "%s.%s" % (type(obj).__module__, type(obj).__name__) try: qfid = obj.qf_id except AttributeError: raise QfurlError("%s has no qfurl" % obj) self.url = _urlunparse((QFURL_SCHEME, "", qfid, "", "", typname)) else: self.url = url
def redacted_urlunparse(url, redact_with="***"): """``urlunparse()`` but redact password.""" netloc = [] if url.username: netloc.append(url.username) if url.password: netloc.append(":") netloc.append(redact_with) if url.hostname: if netloc: netloc.append("@") netloc.append(url.hostname) url = url._replace(netloc="".join(netloc)) return _urlunparse(url)
def escape_channel_url(channel): if channel.startswith("file:"): if "%" in channel: # it's escaped already return channel if on_win: channel = channel.replace("\\", "/") parts = _urlparse(channel) if parts.scheme: components = parts.path.split("/") if on_win: if len(parts.netloc) == 2 and parts.netloc[1] == ":": # with absolute paths (e.g. C:/something), C:, D:, etc might get parsed as netloc path = "/".join([parts.netloc] + [quote(p) for p in components]) parts = parts._replace(netloc="") else: path = "/".join(components[:2] + [quote(p) for p in components[2:]]) else: path = "/".join([quote(p) for p in components]) parts = parts._replace(path=path) return _urlunparse(parts) return channel
def urlunparse(parts): result = _urlunparse(tuple([p.decode("charmap") for p in parts])) return result.encode("charmap")
def download_file(url, path=None, *more_components): """Download file (if necessary) and yield filename if necessary.""" if path: path = fs.path.join(path, *more_components) full_path = fs.path.join(url.path, path) else: full_path = url.path path = fs.path.basename(url.path) url = url._replace(path=fs.path.dirname(url.path)) if url.scheme == "file": if os.path.exists(full_path): yield full_path else: yield None else: basename = fs.path.basename(full_path) if url.scheme in PYFS_SCHEMES: src_fs = make_fs(url) # Download file if it exists. if not src_fs.exists(path): yield None else: with TempFS() as tmpfs: logger.info("Downloading file %s from %s" % (path, redacted_urlunparse(url))) with open(tmpfs.getospath(basename), "wb") as tmpf: src_fs.download(path, tmpf) logger.info("Download complete.") yield tmpfs.getospath(basename) logger.info("Releasing %s" % tmpfs) elif url.scheme == "s3": logger.info("Connecting via S3...") anon = url.username is None and url.password is None s3 = s3fs.S3FileSystem(anon=anon, key=url.username, secret=url.password) with TempFS() as tmpfs: logger.info("Downloading file %s from %s" % (path, redacted_urlunparse(url))) with s3.open("%s/%s" % (url.hostname, path), "rb") as inputf: with open(tmpfs.getospath(basename), "wb") as outputf: shutil.copyfileobj(inputf, outputf, settings.MAX_UPLOAD_DATA_SIZE) logger.info("Download complete.") yield tmpfs.getospath(basename) logger.info("Releasing %s" % tmpfs) elif url.scheme.startswith("http"): logger.info("Downloading via HTTP(S)...") with TempFS() as tmpfs: logger.info("Downloading file %s from %s" % (path, redacted_urlunparse(url))) r = requests.get(_urlunparse(url._replace(path=full_path)), allow_redirects=True) r.raise_for_status() with open(tmpfs.getospath(basename), "wb") as outputf: outputf.write(r.content) logger.info("Download complete.") yield tmpfs.getospath(basename) logger.info("Releasing %s" % tmpfs) elif url.scheme.startswith("irods"): with create_irods_session(url) as irods_session: logger.info("Downloading file...") with TempFS() as tmpfs: logger.info("Downloading file %s from %s" % (path, redacted_urlunparse(url))) path_tmp = tmpfs.getospath(basename) collection = irods_session.collections.get(fs.path.dirname(full_path)) name = fs.path.basename(full_path) for data_object in collection.data_objects: if data_object.name == name: with data_object.open() as inputf: with open(tmpfs.getospath(basename), "wb") as outputf: shutil.copyfileobj( inputf, outputf, settings.MAX_UPLOAD_DATA_SIZE ) break else: raise ScelVisException( "Could not find %s in %s" % (full_path, redacted_urlunparse(url)) ) logger.info("Download complete.") yield path_tmp logger.info("Releasing %s" % tmpfs) else: raise ScelVisException("Invalid URL scheme: %s" % url.scheme)