def write_workflow_attachment(run_id: str, run_request: RunRequest, files: Dict[str, FileStorage]) -> None: exe_dir: Path = get_path(run_id, "exe_dir") host = request.host_url.strip("/") url_prefix = current_app.config['URL_PREFIX'].strip("/") endpoint = f"{host}/{url_prefix}".strip("/") for file in run_request["workflow_attachment"]: if "file_name" in file and "file_url" in file: file_name: str = file["file_name"] file_url: str = file["file_url"] parsed_url = parse.urlparse(file_url) if parsed_url.scheme in ["http", "https"] and \ not file_url.startswith(endpoint): file_path = \ exe_dir.joinpath(secure_filepath(file_name)).resolve() file_path.parent.mkdir(parents=True, exist_ok=True) response: Response = requests.get(file["file_url"]) with file_path.open(mode="wb") as f: f.write(response.content) if current_app.config["WORKFLOW_ATTACHMENT"]: workflow_attachment = \ files.getlist("workflow_attachment[]") # type: ignore for file in workflow_attachment: file_name = secure_filepath(file.filename) # type: ignore file_path = exe_dir.joinpath(file_name).resolve() file_path.parent.mkdir(parents=True, exist_ok=True) file.save(file_path) # type: ignore
def get_runs_id_data(run_id: str, subpath: str = "") -> Response: """ This provides a remote url to download a file or directory under the `run_dir` of the sapporo-service. - In the case of `path/to/file`, this returns the file. - In the case of `path/to/dir`, this returns the list of files under directory in JSON format. - In the case of `path/to/dir?download=true`, this returns the directory in zip format. The path is relative to the base directory of each run. See `README.md` in sapporo-service for the structure of `run_dir`. For example, if you want to download the output `foo.txt`, specify something like `outputs/foo.txt`. `..` will be ignored. """ validate_run_id(run_id) if Path(subpath).name[0] == ".": requested_path = \ secure_filepath(str(Path(subpath).parent) ).joinpath(Path(subpath).name) else: requested_path = secure_filepath(subpath) path = get_run_dir(run_id).joinpath(requested_path) if not path.exists(): parent = Path(f"runs/{run_id}/data").joinpath(requested_path.parent) abort(400, f"The specified path: {requested_path} does not exist. " f"Please make another request to `<endpoint>/{parent}/` again " "and check the dir structure.") if path.is_file(): return send_file(path, as_attachment=True) else: if str2bool(request.args.get("download", False)): with NamedTemporaryFile() as f: res = make_archive(f.name, "zip", root_dir=path.parent, base_dir=path.name) if "temp_files" not in g: g.temp_files = [Path(f"{f.name}.zip")] else: g.temp_files.append(Path(f"{f.name}.zip")) return send_file(res, as_attachment=True, attachment_filename=f"{path.name}.zip") else: response: Response = jsonify(path_hierarchy(path, path)) response.status_code = GET_STATUS_CODE return response
def validate_and_update_run_request(run_id: str, run_request: RunRequest, files: Dict[str, FileStorage]) \ -> RunRequest: if current_app.config["REGISTERED_ONLY_MODE"] and \ "workflow_url" in run_request: abort( 400, "Currently, sapporo-service is running with " "registered_only_mode. " "Therefore, you need to specify a workflow using " "`workflow_name` field. A list of executable workflows can " "be retrieved requesting `GET /service-info`") if "workflow_name" in run_request: wf: Workflow = get_workflow(run_request["workflow_name"]) run_request["workflow_url"] = wf["workflow_url"] run_request["workflow_type"] = wf["workflow_type"] run_request["workflow_type_version"] = wf["workflow_type_version"] if "workflow_attachment" not in run_request: run_request["workflow_attachment"] = wf["workflow_attachment"] for field in [ "workflow_params", "workflow_type", "workflow_type_version", "workflow_url", "workflow_engine_name" ]: if field not in run_request: abort(400, f"{field} not included in the form data of the request.") if "workflow_attachment" in run_request: if type(run_request["workflow_attachment"]) is str: # type: ignore run_request["workflow_attachment"] = \ json.loads(run_request["workflow_attachment"]) # type: ignore else: run_request["workflow_attachment"] = [] if "workflow_engine_parameters" not in run_request: run_request["workflow_engine_parameters"] = "{}" if "tags" not in run_request: run_request["tags"] = "{}" if "workflow_name" not in run_request: tags = json.loads(run_request["tags"]) if "workflow_name" in tags: run_request["workflow_name"] = tags["workflow_name"] else: run_request["workflow_name"] = \ parse.urlparse(run_request["workflow_url"]).path.split("/")[-1] if current_app.config["WORKFLOW_ATTACHMENT"]: workflow_attachment = \ files.getlist("workflow_attachment[]") # type: ignore exe_dir: Path = get_path(run_id, "exe_dir") host = request.host_url.strip("/") url_prefix = current_app.config['URL_PREFIX'].strip("/") endpoint = f"{host}/{url_prefix}".strip("/") base_remote_url = f"{endpoint}/runs/{run_id}/data/" for f in workflow_attachment: file_name: Path = secure_filepath(f.filename) file_path: Path = exe_dir.joinpath(file_name).resolve() run_request["workflow_attachment"].append({ "file_name": str(file_name), "file_url": base_remote_url + str(file_path.relative_to(exe_dir.parent)) }) validate_wf_type(run_request["workflow_type"], run_request["workflow_type_version"]) validate_meta_charactors("workflow_url", run_request["workflow_url"]) validate_meta_charactors("workflow_engine_name", run_request["workflow_engine_name"]) return run_request
def test_contain_space() -> None: assert secure_filepath("My cool movie.mov") == Path("My_cool_movie.mov")
def test_hidden_file() -> None: assert secure_filepath(".foo") == Path("foo")
def test_DS_STORE() -> None: assert secure_filepath("._.DS_STORE") == Path("DS_STORE")
def test_only_double_dot() -> None: assert secure_filepath("..") == Path("")
def test_only_root() -> None: assert secure_filepath("/") == Path("")
def test_contain_ampersand() -> None: assert secure_filepath("/&&/&foo/bar") == Path("foo/bar")
def test_contain_fullsize_ampersand() -> None: assert secure_filepath("/&foo/bar") == Path("foo/bar")
def test_contain_pipe() -> None: assert secure_filepath("/||/|foo/bar") == Path("foo/bar")
def test_japanese_filename() -> None: assert secure_filepath("/フーfoo/バーbar") == Path("foo/bar")
def test_contain_umlauts() -> None: assert secure_filepath( u"i contain cool \xfcml\xe4uts.txt") == \ Path("i_contain_cool_umlauts.txt")
def test_root_dir() -> None: assert secure_filepath("/foo/bar") == Path("foo/bar")
def test_prev_dir() -> None: assert secure_filepath("../../../etc/passwd") == Path("etc/passwd")