def __init__(self, import_dirs: Optional[Sequence[Path]] = None, java_bin: Optional[Union[str, Path]] = None, java_args: Optional[str] = None, cromwell_jar_file: Optional[Union[str, Path]] = None, cromwell_config_file: Optional[Union[str, Path]] = None, cromwell_args: Optional[str] = None): super().__init__(java_bin, java_args) self._import_dirs = import_dirs self._cromwell_jar_file = self.resolve_jar_file( "cromwell*.jar", cromwell_jar_file, ENV_CROMWELL_JAR) if not cromwell_config_file: config_file = os.environ.get(ENV_CROMWELL_CONFIG) if config_file: cromwell_config_file = ensure_path(config_file) if cromwell_config_file: self._cromwell_config_file = ensure_path(cromwell_config_file, is_file=True, exists=True) else: self._cromwell_config_file = None if not self.java_args and self._cromwell_config_file: self.java_args = f"-Dconfig.file={self._cromwell_config_file}" self._cromwell_args = cromwell_args or os.environ.get( ENV_CROMWELL_ARGS)
def import_dirs( project_root: Union[str, Path], import_paths: Optional[Union[str, Path]]) -> List[Union[str, Path]]: """ Fixture that provides a list of directories containing WDL scripts to make avaialble as imports. Uses the file provided by `import_paths` fixture if it is not None, otherwise returns a list containing the parent directory of the test module. Args: project_root: Project root directory import_paths: File listing paths to imports, one per line """ if import_paths: import_paths = ensure_path(import_paths, canonicalize=True) if not import_paths.exists(): raise FileNotFoundError( f"import_paths file {import_paths} does not exist") paths = [] with open(import_paths, "rt") as inp: for path_str in inp.read().splitlines(keepends=False): path = Path(path_str) if not path.is_absolute(): path = ensure_path(project_root / path) if not path.exists(): raise FileNotFoundError(f"Invalid import path: {path}") paths.append(path) return paths else: return []
def __init__(self, project_root: Path, import_dirs: Optional[List[Path]] = None, java_bin: Optional[Union[str, Path]] = None, java_args: Optional[str] = None, cromwell_jar_file: Optional[Union[str, Path]] = None, cromwell_config_file: Optional[Union[str, Path]] = None, cromwell_args: Optional[str] = None): self.project_root = project_root self.import_dirs = import_dirs if not java_bin: java_home = os.environ.get(ENV_JAVA_HOME) if java_home: java_bin = Path(java_home) / "bin" / "java" else: java_bin = find_executable_path("java") if not java_bin: raise FileNotFoundError("Could not find java executable") self.java_bin = ensure_path(java_bin, exists=True, is_file=True, executable=True) if not cromwell_jar_file: cromwell_jar = os.environ.get(ENV_CROMWELL_JAR) if cromwell_jar: cromwell_jar_file = ensure_path(cromwell_jar) else: cromwell_jar_file = find_in_classpath("cromwell*.jar") if not cromwell_jar_file: raise FileNotFoundError("Could not find Cromwell JAR file") self.cromwell_jar_file = ensure_path(cromwell_jar_file, is_file=True, exists=True) if not cromwell_config_file: config_file = os.environ.get(ENV_CROMWELL_CONFIG) if config_file: cromwell_config_file = ensure_path(config_file) if cromwell_config_file: self.cromwell_config_file = ensure_path(cromwell_config_file, is_file=True, exists=True) else: self.cromwell_config_file = None if not java_args and self.cromwell_config_file: java_args = f"-Dconfig.file={self.cromwell_config_file}" self.java_args = java_args self.cromwell_args = cromwell_args or os.environ.get(ENV_CROMWELL_ARGS)
def path(self) -> Path: if not self.local_path.exists(): if self.localizer: ensure_path(self.local_path, is_file=True, create=True) self.localizer.localize(self.local_path) else: raise RuntimeError( f"Localization to {self.local_path} is required but no localizer " f"is defined" ) return self.local_path
def __init__( self, import_dirs: Optional[Sequence[Path]] = None, java_bin: Optional[Union[str, Path]] = None, java_args: Optional[str] = None, cromwell_jar_file: Optional[Union[str, Path]] = None, cromwell_configuration: Optional[Union[str, Path, dict]] = None, cromwell_args: Optional[str] = None, # deprecated cromwell_config_file: Optional[Union[str, Path]] = None, ): super().__init__(java_bin, java_args) self._import_dirs = import_dirs self._cromwell_jar_file = self.resolve_jar_file( "cromwell*.jar", cromwell_jar_file, ENV_CROMWELL_JAR) if cromwell_config_file: LOG.warn( "The 'cromwell_config_file' parameter is deprecated; please use " "'cromwell_configuration' instead.") if not cromwell_configuration: cromwell_configuration = cromwell_config_file if not cromwell_configuration: config_file = os.environ.get(ENV_CROMWELL_CONFIG) if config_file: cromwell_configuration = ensure_path(config_file) if cromwell_configuration: if self.java_args: LOG.warn( "'cromwell_configuration' is ignored when 'java_args' are set" ) else: if isinstance(cromwell_configuration, dict): cromwell_config_file = Path( tempfile.mkstemp(suffix=".zip")[1]) with open(cromwell_config_file, "wt") as out: json.dump(cromwell_configuration, out) else: cromwell_config_file = ensure_path(cromwell_configuration, is_file=True, exists=True) self.java_args = f"-Dconfig.file={cromwell_config_file}" self._cromwell_args = cromwell_args or os.environ.get( ENV_CROMWELL_ARGS)
def runtest(self): # Get/create DataManager if self._data: config = self._fixture_request.getfixturevalue("user_config") data_resolver = DataResolver(self._data, config) data_dirs = DataDirs( ensure_path(self._fixture_request.fspath.dirpath(), canonicalize=True), function=self.name, module=None, # TODO: support a top-level key for module name cls=None, # TODO: support test groupings ) workflow_data = DataManager(data_resolver, data_dirs) else: workflow_data = self._fixture_request.getfixturevalue( "workflow_data") # Build the arguments to workflow_runner workflow_runner_kwargs = self._workflow_runner_kwargs # Resolve test data requests in the inputs and outputs if self._inputs: workflow_runner_kwargs["inputs"] = _resolve_test_data( self._inputs, workflow_data) if self._expected: workflow_runner_kwargs["expected"] = _resolve_test_data( self._expected, workflow_data) # Run the test workflow_runner = self._fixture_request.getfixturevalue( "workflow_runner") return workflow_runner(self._wdl, **workflow_runner_kwargs)
def workflow_data_descriptors( request: FixtureRequest, project_root: Union[str, Path], workflow_data_descriptor_file: Union[str, Path], ) -> dict: """ Fixture that provides a mapping of test data names to values. If workflow_data_descriptor_file is relative, it is searched first relative to the current test context directory and then relative to the project root. Args: workflow_data_descriptor_file: Path to the data descriptor JSON file. Returns: A dict with keys as test data names and each value either a primitive, a map describing a data file, or a DataFile object. """ search_paths = [Path(request.fspath.dirpath()), project_root] workflow_data_descriptor_path = ensure_path( workflow_data_descriptor_file, search_paths=search_paths, is_file=True, exists=True, ) with open(workflow_data_descriptor_path, "rt") as inp: if yaml and workflow_data_descriptor_path.suffix == ".yaml": return yaml.load(inp) else: return json.load(inp)
def get_workflow(project_root: Path, wdl_file: Union[str, Path], workflow_name: Optional[str] = None) -> Tuple[Path, str]: """ Resolve the WDL file and workflow name. TODO: if `workflow_name` is None, parse the WDL file and extract the name of the workflow. Args: project_root: The root directory to which `wdl_file` might be relative. wdl_file: Path to the WDL file. workflow_name: The workflow name; if None, the filename without ".wdl" extension is used. Returns: A tuple (wdl_path, workflow_name) """ wdl_path = ensure_path(wdl_file, project_root, canonicalize=True) if not wdl_path.exists(): raise FileNotFoundError(f"WDL file not found at path {wdl_path}") if not workflow_name: workflow_name = safe_string(wdl_path.stem) return wdl_path, workflow_name
def default_user_config_file() -> Path: config_file = os.environ.get(ENV_USER_CONFIG) config_path = None if config_file: config_path = ensure_path(config_file) else: default_config_paths = [ Path.home() / f"{DEFAULT_USER_CONFIG_FILE}.json", Path.home() / f".{DEFAULT_USER_CONFIG_FILE}.json" ] if yaml: default_config_paths.extend([ Path.home() / f"{DEFAULT_USER_CONFIG_FILE}.yaml", Path.home() / f".{DEFAULT_USER_CONFIG_FILE}.yaml" ]) for default_config_path in default_config_paths: if default_config_path.exists(): config_path = default_config_path break if config_path and not config_path.exists(): raise FileNotFoundError(f"Config file {config_path} does not exist") return config_path
def resolve_jar_file( file_name_pattern: str, jar_path: Optional[Path] = None, env_var: Optional[str] = None, ): if not jar_path: path_str = None if env_var: path_str = os.environ.get(env_var) if path_str: jar_path = ensure_path(path_str) else: jar_path = find_in_classpath(file_name_pattern) if not jar_path: raise FileNotFoundError(f"Could not find JAR file {file_name_pattern}") return ensure_path(jar_path, is_file=True, exists=True)
def _get_workflow_imports(self, imports_file: Optional[Path] = None) -> Path: """ Creates a ZIP file with all WDL files to be imported. Args: imports_file: Text file naming import directories/files - one per line. Returns: Path to the ZIP file. """ write_imports = bool(self._import_dirs) imports_path = None if imports_file: imports_path = ensure_path(imports_file) if imports_path.exists(): write_imports = False if write_imports and self._import_dirs: imports = [ wdl for path in self._import_dirs for wdl in glob.glob(str(path / "*.wdl")) ] if imports: if imports_path: ensure_path(imports_path, is_file=True, create=True) else: imports_path = Path(tempfile.mkstemp(suffix=".zip")[1]) imports_str = " ".join(imports) LOG.info( f"Writing imports {imports_str} to zip file {imports_path}" ) exe = subby.run(f"zip -j - {imports_str}", mode=bytes, stdout=imports_path, raise_on_error=False) if not exe.ok: raise Exception( f"Error creating imports zip file; stdout={exe.output}; " f"stderr={exe.error}") return imports_path
def get_workflow_imports(import_dirs: Optional[List[Path]] = None, imports_file: Optional[Path] = None) -> Path: """ Creates a ZIP file with all WDL files to be imported. Args: import_dirs: Directories from which to import WDL files. imports_file: Text file naming import directories/files - one per line. Returns: Path to the ZIP file. """ write_imports = bool(import_dirs) imports_path = None if imports_file: imports_path = ensure_path(imports_file) if imports_path.exists(): write_imports = False if write_imports and import_dirs: imports = [ wdl for path in import_dirs for wdl in glob.glob(str(path / "*.wdl")) ] if imports: if imports_path: ensure_path(imports_path, is_file=True, create=True) else: imports_path = Path(tempfile.mkstemp(suffix=".zip")[1]) imports_str = " ".join(imports) LOG.info( f"Writing imports {imports_str} to zip file {imports_path}") exe = delegator.run(f"zip -j - {imports_str} > {imports_path}", block=True) if not exe.ok: raise Exception( f"Error creating imports zip file; stdout={exe.out}; " f"stderr={exe.err}") return imports_path
def __init__( self, import_dirs: Optional[Sequence[Path]] = None, java_bin: Optional[Union[str, Path]] = None, java_args: Optional[str] = None, dxwdl_jar_file: Optional[Union[str, Path]] = None, dxwdl_cache_dir: Optional[Union[str, Path]] = None, ): super().__init__(java_bin, java_args) self._import_dirs = import_dirs self._dxwdl_jar_file = self.resolve_jar_file( "dxWDL*.jar", dxwdl_jar_file, ENV_DXWDL_JAR ) if dxwdl_cache_dir: self._dxwdl_cache_dir = ensure_path(dxwdl_cache_dir) self._cleanup_cache = False else: self._dxwdl_cache_dir = ensure_path(tempfile.mkdtemp()) self._cleanup_cache = True
def _run_workflow(wdl_script: Union[str, Path], *args, inputs: Optional[dict] = None, expected: Optional[dict] = None, executors: Optional[Sequence[str]] = None, **kwargs): inputs, expected, kwargs = _reformat_args(list(args), inputs=inputs, expected=expected, **kwargs) search_paths = [Path(request.fspath.dirpath()), project_root] wdl_path = ensure_path(wdl_script, search_paths, is_file=True, exists=True) import_dir_paths = [ ensure_path(d, is_file=False, exists=True) for d in import_dirs ] if not executors: executors = default_executors def _run_test(_executor_name): executor = create_executor(_executor_name, import_dir_paths, user_config) with context_dir(user_config.default_execution_dir, change_dir=True): executor.run_workflow(wdl_path, inputs=inputs, expected=expected, **kwargs) if len(executors) == 0: raise RuntimeError("At least one executor must be specified") elif len(executors) == 1: _run_test(executors[0]) else: for executor_name in executors: with subtests.test(msg=executor_name, executor_name=executor_name): _run_test(executor_name)
def get_workflow_inputs( workflow_name: str, inputs_dict: Optional[dict] = None, inputs_file: Optional[Path] = None) -> Tuple[dict, Path]: """ Persist workflow inputs to a file, or load workflow inputs from a file. Args: workflow_name: Name of the workflow; used to prefix the input parameters when creating the inputs file from the inputs dict. inputs_dict: Dict of input names/values. inputs_file: JSON file with workflow inputs. Returns: A tuple (inputs_dict, inputs_file) """ if inputs_file: inputs_file = ensure_path(inputs_file) if inputs_file.exists(): with open(inputs_file, "rt") as inp: inputs_dict = json.load(inp) return inputs_dict, inputs_file if inputs_dict: inputs_dict = dict( (f"{workflow_name}.{key}", value.path if isinstance(value, DataFile) else value) for key, value in inputs_dict.items()) if inputs_file: inputs_file = ensure_path(inputs_file, is_file=True, create=True) else: inputs_file = Path(tempfile.mkstemp(suffix=".json")[1]) with open(inputs_file, "wt") as out: json.dump(inputs_dict, out, default=str) return inputs_dict, inputs_file
def workflow_data_descriptors( workflow_data_descriptor_file: Union[str, Path]) -> dict: """ Fixture that provides a mapping of test data names to values. Args: workflow_data_descriptor_file: Path to the data descriptor JSON file. Returns: A dict with keys as test data names and each value either a primitive, a map describing a data file, or a DataFile object. """ with open(ensure_path(workflow_data_descriptor_file), "rt") as inp: return json.load(inp)
def get_workflow_inputs(inputs_dict: Optional[dict] = None, inputs_file: Optional[Path] = None, namespace: Optional[str] = None) -> Tuple[dict, Path]: """ Persist workflow inputs to a file, or load workflow inputs from a file. Args: inputs_dict: Dict of input names/values. inputs_file: JSON file with workflow inputs. namespace: Name of the workflow; used to prefix the input parameters when creating the inputs file from the inputs dict. Returns: A tuple (inputs_dict, inputs_file) """ if inputs_file: inputs_file = ensure_path(inputs_file) if inputs_file.exists(): with open(inputs_file, "rt") as inp: inputs_dict = json.load(inp) return inputs_dict, inputs_file if inputs_dict: prefix = f"{namespace}." if namespace else "" inputs_dict = dict((f"{prefix}{key}", make_serializable(value)) for key, value in inputs_dict.items()) if inputs_file: inputs_file = ensure_path(inputs_file, is_file=True, create=True) else: inputs_file = Path(tempfile.mkstemp(suffix=".json")[1]) with open(inputs_file, "wt") as out: json.dump(inputs_dict, out, default=str) return inputs_dict, inputs_file
def read_write_inputs( inputs_file: Optional[Union[str, Path]] = None, inputs_dict: Optional[dict] = None, inputs_formatter: Optional[InputsFormatter] = InputsFormatter.get_instance(), write_formatted_inputs: bool = True, **kwargs ) -> Tuple[dict, Optional[Path]]: """ If `inputs_file` is specified and it exists, read its contents. Otherwise, if `inputs_dict` is specified, format it using `inputs_formatter` (if specified) and write it to `inputs_file` or a temporary file. Args: inputs_file: inputs_dict: inputs_formatter: write_formatted_inputs: kwargs: Returns: The (formatted) inputs dict and the resolved inputs file. If both `inputs_dict` and `inputs_file` are None, returns `({}, None)`. """ if inputs_file: inputs_file = ensure_path(inputs_file, is_file=True, create=True) if inputs_file.exists(): with open(inputs_file, "rt") as inp: inputs_dict_from_file = json.load(inp) return inputs_dict_from_file, inputs_file if inputs_dict: inputs_dict = inputs_formatter.format_inputs(inputs_dict, **kwargs) if write_formatted_inputs: if not inputs_file: inputs_file = Path(tempfile.mkstemp(suffix=".json")[1]) with open(inputs_file, "wt") as out: json.dump(inputs_dict, out, default=str) return inputs_dict, inputs_file return {}, None
def user_config_file() -> Optional[Path]: """ Fixture that provides the value of 'user_config' environment variable. If not specified, looks in the default location ($HOME/pytest_user_config.json). Returns: Path to the confif file, or None if not specified. """ config_file = os.environ.get(ENV_USER_CONFIG) config_path = None if config_file: config_path = ensure_path(config_file) else: default_config_path = Path.home() / DEFAULT_USER_CONFIG_FILE if default_config_path.exists(): config_path = default_config_path if config_path and not config_path.exists(): raise FileNotFoundError(f"Config file {config_path} does not exist") return config_path
def __init__(self, java_bin: Optional[Union[str, Path]] = None, java_args: Optional[str] = None): if not java_bin: java_home = os.environ.get(ENV_JAVA_HOME) if java_home: java_bin = Path(java_home) / "bin" / "java" else: java_bin = find_executable_path("java") if not java_bin: raise FileNotFoundError("Could not find java executable") self.java_bin = ensure_path(java_bin, exists=True, is_file=True, executable=True) self.java_args = java_args or os.environ.get(ENV_JAVA_ARGS)
def workflow_runner( request: FixtureRequest, project_root: Union[str, Path], import_dirs: List[Union[str, Path]], user_config: UserConfiguration, default_executors: Sequence[str], subtests: SubTests, ): """ Provides a callable that runs a workflow. The callable has the same signature as `Executor.run_workflow`, but takes an additional keyword argument `executors`, a sequence of strings, which allows overriding the names of the executors to use. If multiple executors are specified, the tests are run using the `subtests` fixture of the `pytest-subtests` plugin. Args: request: A FixtureRequest object. project_root: Project root directory. import_dirs: Directories from which to import WDL scripts. user_config: A UserConfiguration object. default_executors: Names of executors to use when executor name isn't passed to the `workflow_runner` callable. subtests: A SubTests object. Returns: A generator over the results of calling the workflow with each executor. Each value is a tuple `(executor_name, execution_dir, outputs)`, where `execution_dir` is the root directory where the task/workflow was run (the structure of the directory is executor-dependent) and `outputs` is a dict of the task/workflow outputs. """ return WorkflowRunner( default_executors=default_executors, wdl_search_paths=[Path(request.fspath.dirpath()), project_root], import_dirs=[ ensure_path(d, is_file=False, exists=True) for d in import_dirs ], user_config=user_config, subtests=subtests, )
def workflow_data(request: FixtureRequest, workflow_data_resolver: DataResolver) -> DataManager: """ Provides an accessor for test data files, which may be local or in a remote repository. Args: request: FixtureRequest object workflow_data_resolver: Module-level test data configuration Examples: def workflow_data_descriptor_file(): return "tests/test_data.json" def test_workflow(workflow_data): print(workflow_data["myfile"]) """ datadirs = DataDirs( ensure_path(request.fspath.dirpath(), canonicalize=True), request.module, request.function, request.cls) return DataManager(workflow_data_resolver, datadirs)
def create_data_file(user_config: UserConfiguration, type: Optional[Union[str, dict]] = DEFAULT_TYPE, name: Optional[str] = None, path: Optional[Union[str, Path]] = None, url: Optional[str] = None, contents: Optional[Union[str, dict]] = None, env: Optional[str] = None, http_headers: Optional[dict] = None, digests: Optional[dict] = None, datadirs: Optional[DataDirs] = None, **kwargs) -> DataFile: if isinstance(type, dict): data_file_opts = cast(dict, type) type = data_file_opts.pop("name") else: data_file_opts = {} data_file_opts.update(kwargs) local_path = None localizer = None if path: local_path = ensure_path(path, [user_config.cache_dir]) if local_path and local_path.exists(): pass elif env and env in os.environ: env_path = ensure_path(os.environ[env], exists=True) if not local_path: local_path = env_path else: localizer = LinkLocalizer(env_path) elif url: localizer = UrlLocalizer(url, user_config, http_headers, digests) if not local_path: if name: local_path = ensure_path(user_config.cache_dir / name) else: filename = url.rsplit("/", 1)[1] local_path = ensure_path(user_config.cache_dir / filename) elif contents: if isinstance(contents, str): localizer = StringLocalizer(cast(str, contents)) else: localizer = JsonLocalizer(cast(dict, contents)) if type == DEFAULT_TYPE: type = "json" if not local_path: if name: local_path = ensure_path(user_config.cache_dir / name) else: local_path = ensure_path( tempfile.mktemp(dir=user_config.cache_dir)) elif name and datadirs: for dd in datadirs.paths: dd_path = dd / name if dd_path.exists(): break else: raise FileNotFoundError( f"File {name} not found in any of the following datadirs: " f"{datadirs.paths}") if not local_path: local_path = dd_path else: localizer = LinkLocalizer(dd_path) else: raise FileNotFoundError( f"File {path or name} does not exist. Either a url, file contents, " f"or a local file must be provided.") data_file_class = DATA_TYPES.get(type, DefaultDataFile) return data_file_class(local_path, localizer, **data_file_opts)
def __init__( self, config_file: Optional[Path] = None, cache_dir: Optional[Path] = None, remove_cache_dir: Optional[bool] = None, execution_dir: Optional[Path] = None, proxies: Optional[Dict[str, Union[str, Dict[str, str]]]] = None, http_headers: Optional[List[dict]] = None, show_progress: Optional[bool] = None, executor_defaults: Optional[Dict[str, dict]] = None, ): if config_file: with open(config_file, "rt") as inp: defaults = json.load(inp) else: defaults = {} if not cache_dir: cache_dir_str = os.environ.get(ENV_CACHE_DIR, defaults.get(KEY_CACHE_DIR)) if cache_dir_str: cache_dir = ensure_path(cache_dir_str) if cache_dir: self.cache_dir = ensure_path(cache_dir, is_file=False, create=True) if remove_cache_dir is None: remove_cache_dir = False else: self.cache_dir = Path(tempfile.mkdtemp()) if remove_cache_dir is None: remove_cache_dir = True self.remove_cache_dir = remove_cache_dir if not execution_dir: execution_dir_str = os.environ.get(ENV_EXECUTION_DIR, defaults.get(KEY_EXECUTION_DIR)) if execution_dir_str: execution_dir = ensure_path(execution_dir_str) if execution_dir: self.default_execution_dir = ensure_path(execution_dir, is_file=False, create=True) else: self.default_execution_dir = None if not proxies and KEY_PROXIES in defaults: proxies = env_map(defaults[KEY_PROXIES]) self.proxies = proxies or {} if not http_headers and KEY_HTTP_HEADERS in defaults: http_headers = defaults[KEY_HTTP_HEADERS] for d in http_headers: if "pattern" in d: d["pattern"] = re.compile(d.pop("pattern")) self.default_http_headers = http_headers or [] self.show_progress = show_progress if self.show_progress is None: self.show_progress = defaults.get(KEY_SHOW_PROGRESS) self.executor_defaults = executor_defaults or {} if "executors" in defaults: for name, d in defaults["executors"].items(): if name not in self.executor_defaults: self.executor_defaults[name] = d
def create_data_file(self, type: Optional[str] = "default", name: Optional[str] = None, path: Optional[str] = None, url: Optional[str] = None, contents: Optional[str] = None, env: Optional[str] = None, datadirs: Optional[DataDirs] = None, http_headers: Optional[dict] = None, **kwargs) -> DataFile: data_file_class = DATA_TYPES.get(type, DataFile) local_path = None localizer = None if path: local_path = ensure_path(path, self.user_config.cache_dir) if local_path and local_path.exists(): pass elif env and env in os.environ: env_path = ensure_path(os.environ[env], exists=True) if not local_path: local_path = env_path else: localizer = LinkLocalizer(env_path) elif url: localizer = UrlLocalizer(url, self.user_config, http_headers) if not local_path: if name: local_path = ensure_path(self.user_config.cache_dir / name) else: filename = url.rsplit("/", 1)[1] local_path = ensure_path(self.user_config.cache_dir / filename) elif contents: localizer = StringLocalizer(contents) if not local_path: if name: local_path = ensure_path(self.user_config.cache_dir / name) else: local_path = ensure_path( tempfile.mktemp(dir=self.user_config.cache_dir)) elif name and datadirs: for dd in datadirs.paths: dd_path = dd / name if dd_path.exists(): break else: raise FileNotFoundError( f"File {name} not found in any of the following datadirs: " f"{datadirs.paths}") if not local_path: local_path = dd_path else: localizer = LinkLocalizer(dd_path) else: raise FileNotFoundError( f"File {path or name} does not exist. Either a url, file contents, " f"or a local file must be provided.") return data_file_class(local_path, localizer, **kwargs)
def test_ensure_path(): cwd = Path.cwd() assert ensure_path(cwd) == cwd cwd_str = str(cwd) assert ensure_path(cwd_str) == cwd assert ensure_path(cwd.name, [cwd.parent]) == cwd assert ensure_path(cwd.name, [cwd.parent]) == cwd home = Path.home() assert ensure_path("~", canonicalize=False) == Path("~") assert ensure_path("~", canonicalize=True) == home with tempdir() as d: with pytest.raises(FileNotFoundError): ensure_path(d / "foo", exists=True) foo = d / "foo" assert not foo.exists() bar = foo / "bar" ensure_path(bar, is_file=True, create=True) assert foo.exists() with open(bar, "wt") as out: out.write("foo") ensure_path(bar, exists=True, is_file=True) with pytest.raises(NotADirectoryError): ensure_path(bar, exists=True, is_file=False) with pytest.raises(OSError): ensure_path(bar, exists=True, is_file=True, executable=True) os.chmod(bar, bar.stat().st_mode | stat.S_IEXEC) ensure_path(bar, exists=True, is_file=True, executable=True) baz = d / "baz" assert not baz.exists() ensure_path(baz, is_file=False, create=True) assert baz.exists() assert baz.is_dir() with pytest.raises(FileExistsError): ensure_path(baz, exists=False) with pytest.raises(IsADirectoryError): ensure_path(baz, exists=True, is_file=True)
def _args( self, wdl_script: Union[str, Path], *args, inputs: Optional[dict] = None, expected: Optional[dict] = None, executors: Optional[Sequence[str]] = None, **kwargs, ) -> Tuple[Sequence[str], dict]: """ Handle multiple different call signatures. Args: wdl_script: args: Positional arguments. Supports backward-compatibility for workflows using the old `run_workflow` signature in which the second argument was the workflow name. This will be removed in the next major version. inputs: expected: executors: kwargs: Additional keyword arguments Returns: Tuple of (executors, call_kwargs). """ wdl_path = ensure_path(wdl_script, self._wdl_search_paths, is_file=True, exists=True) if args: args_list = list(args) if isinstance(args_list[0], str): kwargs["workflow_name"] = args_list.pop(0) if args_list: if inputs: raise TypeError("Multiple values for argument 'inputs'") inputs = args_list.pop(0) if args_list: if expected: raise TypeError( "Multiple values for argument 'expected'") expected = args_list.pop(0) if args_list: raise TypeError("Too many arguments") if not executors: executors = self._default_executors if len(executors) == 0: raise RuntimeError("At least one executor must be specified") call_args = { "wdl_path": wdl_path, "inputs": inputs, "expected": expected, } call_args.update(kwargs) return executors, call_args
def run_workflow( self, wdl_path: Path, inputs: Optional[dict] = None, expected: Optional[dict] = None, **kwargs ) -> dict: # TODO: handle "task_name" kwarg - run app instead of workflow wdl_doc = parse_wdl(wdl_path, self._import_dirs, **kwargs) if not wdl_doc.workflow: raise ValueError( "Currently, the dxWDL executor only supports executing " "workflows, not individual tasks" ) workflow_name = wdl_doc.workflow.name if ( "workflow_name" in kwargs and workflow_name != kwargs["workflow-name"] ): raise ValueError( f"The workflow name '{workflow_name}' does not match the value " f"of the 'workflow_name' parameter '{kwargs['workflow-name']}'" ) namespace = kwargs.get("stage_id", "stage-common") inputs_dict = None if "inputs_file" in kwargs: inputs_file = ensure_path(kwargs["inputs_file"]) if inputs_file.exists(): with open(inputs_file, "rt") as inp: inputs_dict = json.load(inp) if not inputs_dict: workflow_inputs = wdl_doc.workflow.available_inputs if workflow_inputs: dx_inputs_formatter = DxInputsFormatter(wdl_doc, **kwargs) inputs_dict = dx_inputs_formatter.format_inputs(inputs, namespace) else: inputs_dict = {} try: with login(): workflow = self._resolve_workflow(wdl_path, workflow_name, kwargs) analysis = workflow.run(inputs_dict) try: analysis.wait_on_done() outputs = self._get_analysis_outputs(analysis, expected.keys()) if expected: self._validate_outputs(outputs, expected, OUTPUT_STAGE) return outputs except dxpy.exceptions.DXJobFailureError: raise ExecutionFailedError( "dxWDL", workflow_name, analysis.describe()["state"], inputs_dict, **self._get_failed_task(analysis) ) finally: if self._cleanup_cache: shutil.rmtree(self._dxwdl_cache_dir) except dxpy.exceptions.InvalidAuthentication as ierr: raise ExecutorError("dxwdl", "Invalid DNAnexus credentials/token") from ierr except dxpy.exceptions.ResourceNotFound as rerr: raise ExecutorError("dxwdl", "Required resource was not found") from rerr except dxpy.exceptions.PermissionDenied as perr: raise ExecutorError( "dxwdl", f"You must have at least CONTRIBUTE permission" ) from perr