def load(*packages): """ Loads contrib *packages* and adds them to the law namespace. Example: .. code-block:: python import law law.contrib.load("docker") law.docker.DockerSandbox(...) It is ensured that packages are loaded only once. """ for pkg in flatten(packages): if pkg in loaded_packages: logger.debug( "skip contrib package '{}', already loaded".format(pkg)) continue elif not os.path.exists(law_src_path("contrib", pkg, "__init__.py")): raise Exception("contrib package '{}' does not exist".format(pkg)) elif getattr(law, pkg, None): raise Exception( "cannot load contrib package '{}', attribute with that name already " "exists in the law module".format(pkg)) mod = __import__("law.contrib.{}".format(pkg), globals(), locals(), [pkg]) setattr(law, pkg, mod) law.__all__.append(pkg) loaded_packages.append(pkg) logger.debug("loaded contrib package '{}'".format(pkg))
def arc_wrapper_file(self): return law_src_path("job", "bash_wrapper.sh")
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix _postfix = lambda path: self.job_file_factory.postfix_file( path, postfix) pf = lambda s: "__law_job_postfix__:{}".format(s) # collect task parameters proxy_cmd = ProxyCommand( task.as_branch(branches[0]), exclude_task_args={"branch"}, exclude_global_args=["workers", "local-scheduler"]) if task.lsf_use_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) for key, value in OrderedDict(task.lsf_cmdline_args()).items(): proxy_cmd.add_arg(key, value, overwrite=True) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=proxy_cmd.build(skip_run=True), branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) # get the actual wrapper file that will be executed by the remote job wrapper_file = get_path(task.lsf_wrapper_file()) config.command = "bash {} {}".format( _postfix(os.path.basename(wrapper_file)), job_args.join()) # meta infos config.job_name = task.task_id config.emails = True # prepare render variables config.render_variables = {} # input files config.input_files = [wrapper_file, law_src_path("job", "job.sh")] # add the bootstrap file bootstrap_file = task.lsf_bootstrap_file() if bootstrap_file: config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.lsf_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # output files config.output_files = [] # custom content config.custom_content = [] # logging # we do not use lsf's logging mechanism since it requires that the submission directory # is present when it retrieves logs, and therefore we rely on the job.sh script config.stdout = None config.stderr = None if task.transfer_logs: log_file = "stdall.txt" config.custom_log_file = log_file config.render_variables["log_file"] = pf(log_file) # we can use lsf's file stageout only when the output directory is local # otherwise, one should use the stageout_file and stageout manually output_dir = task.lsf_output_directory() if isinstance(output_dir, LocalDirectoryTarget): config.absolute_paths = True config.cwd = output_dir.path else: del config.output_files[:] # task hook config = task.lsf_job_config(config, job_num, branches) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files ] config.render_variables["input_files"] = " ".join(input_basenames) # build the job file and get the sanitized config job_file, config = self.job_file_factory(**config.__dict__) # determine the absolute custom log file if set abs_log_file = None if config.custom_log_file and isinstance(output_dir, LocalDirectoryTarget): abs_log_file = output_dir.child(config.custom_log_file, type="f").path # return job and log files return {"job": job_file, "log": abs_log_file}
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix pf = lambda s: "postfix:{}".format(s) # get the actual wrapper file that will be executed by the remote job wrapper_file = get_path(task.htcondor_wrapper_file()) config.executable = os.path.basename(wrapper_file) # collect task parameters task_params = task.as_branch(branches[0]).cli_args(exclude={"branch"}) task_params += global_cmdline_args( exclude=[("--workers", 1), ("--local-scheduler", 1)]) if task.htcondor_use_local_scheduler(): task_params = add_cmdline_arg(task_params, "--local-scheduler", "True") for arg in task.htcondor_cmdline_args() or []: if isinstance(arg, tuple): task_params = add_cmdline_arg(task_params, *arg) else: task_params = add_cmdline_arg(task_params, arg) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=task_params, branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) config.arguments = job_args.join() # prepare render variables config.render_variables = {} # input files config.input_files = [wrapper_file, law_src_path("job", "job.sh")] config.render_variables["job_file"] = pf("job.sh") # add the bootstrap file bootstrap_file = task.htcondor_bootstrap_file() if bootstrap_file: config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.htcondor_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # output files config.output_files = [] # custom content config.custom_content = [] # logging # we do not use condor's logging mechanism since it requires that the submission directory # is present when it retrieves logs, and therefore we rely on the job.sh script config.log = None config.stdout = None config.stderr = None if task.transfer_logs: log_file = "stdall.txt" config.output_files.append(log_file) config.render_variables["log_file"] = pf(log_file) # we can use condor's file stageout only when the output directory is local # otherwise, one should use the stageout_file and stageout manually output_dir = task.htcondor_output_directory() if isinstance(output_dir, LocalDirectoryTarget): config.absolute_paths = True config.custom_content.append(("initialdir", output_dir.path)) else: del config.output_files[:] # task hook config = task.htcondor_job_config(config, job_num, branches) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files[1:] ] config.render_variables["input_files"] = " ".join(input_basenames) return self.job_file_factory(**config.__dict__)
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix pf = lambda s: "postfix:{}".format(s) # executable config.executable = "bash_wrapper.sh" # collect task parameters task_params = task.as_branch(branches[0]).cli_args(exclude={"branch"}) task_params += global_cmdline_args() # force the local scheduler? ls_flag = "--local-scheduler" if ls_flag not in task_params and task.glite_use_local_scheduler(): task_params.append(ls_flag) # job script arguments job_args = JobArguments( task_module=task.__class__.__module__, task_family=task.task_family, task_params=task_params, branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.attempts.get(job_num, 0)), ) config.arguments = job_args.join() # meta infos config.output_uri = task.glite_output_uri() # prepare render variables config.render_variables = {} # input files config.input_files = [ law_src_path("job", "bash_wrapper.sh"), law_src_path("job", "job.sh") ] config.render_variables["job_file"] = pf("job.sh") # add the bootstrap file bootstrap_file = task.glite_bootstrap_file() config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.glite_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files ] config.render_variables["input_files"] = " ".join(input_basenames) # output files config.output_files = [] # custom content config.custom_content = [] # log file if task.transfer_logs: log_file = "stdall.txt" config.stdout = log_file config.stderr = log_file config.output_files.append(log_file) config.render_variables["log_file"] = pf(log_file) else: config.stdout = None config.stderr = None # task hook config = task.glite_job_config(config, job_num, branches) return self.job_file_factory(**config.__dict__)
def cmd(self, proxy_cmd): # singularity exec command arguments # -e clears the environment args = ["-e"] # helper to build forwarded paths cfg = Config.instance() cfg_section = self.get_config_section() forward_dir = cfg.get_expanded(cfg_section, "forward_dir") python_dir = cfg.get_expanded(cfg_section, "python_dir") bin_dir = cfg.get_expanded(cfg_section, "bin_dir") stagein_dir_name = cfg.get_expanded(cfg_section, "stagein_dir_name") stageout_dir_name = cfg.get_expanded(cfg_section, "stageout_dir_name") def dst(*args): return os.path.join(forward_dir, *(str(arg) for arg in args)) # helper for mounting a volume volume_srcs = [] def mount(*vol): src = vol[0] # make sure, the same source directory is not mounted twice if src in volume_srcs: return volume_srcs.append(src) # ensure that source directories exist if not os.path.isfile(src): makedirs(src) # store the mount point args.extend(["-B", ":".join(vol)]) # determine whether volume binding is allowed allow_binds_cb = getattr(self.task, "singularity_allow_binds", None) if callable(allow_binds_cb): allow_binds = allow_binds_cb() else: allow_binds = cfg.get_expanded(cfg_section, "allow_binds") # determine whether law software forwarding is allowed forward_law_cb = getattr(self.task, "singularity_forward_law", None) if callable(forward_law_cb): forward_law = forward_law_cb() else: forward_law = cfg.get_expanded(cfg_section, "forward_law") # environment variables to set env = self._get_env() # prevent python from writing byte code files env["PYTHONDONTWRITEBYTECODE"] = "1" if forward_law: # adjust path variables if allow_binds: env["PATH"] = os.pathsep.join([dst("bin"), "$PATH"]) env["PYTHONPATH"] = os.pathsep.join( [dst(python_dir), "$PYTHONPATH"]) else: env["PATH"] = "$PATH" env["PYTHONPATH"] = "$PYTHONPATH" # forward python directories of law and dependencies for mod in law_deps: path = os.path.dirname(mod.__file__) name, ext = os.path.splitext(os.path.basename(mod.__file__)) if name == "__init__": vsrc = path vdst = dst(python_dir, os.path.basename(path)) else: vsrc = os.path.join(path, name + ".py") vdst = dst(python_dir, name + ".py") if allow_binds: mount(vsrc, vdst) else: dep_path = os.path.dirname(vsrc) if dep_path not in env["PYTHONPATH"].split(os.pathsep): env["PYTHONPATH"] = os.pathsep.join( [dep_path, env["PYTHONPATH"]]) # forward the law cli dir to bin as it contains a law executable if allow_binds: env["PATH"] = os.pathsep.join( [dst(python_dir, "law", "cli"), env["PATH"]]) else: env["PATH"] = os.pathsep.join( [law_src_path("cli"), env["PATH"]]) # forward the law config file if cfg.config_file: if allow_binds: mount(cfg.config_file, dst("law.cfg")) env["LAW_CONFIG_FILE"] = dst("law.cfg") else: env["LAW_CONFIG_FILE"] = cfg.config_file # forward the luigi config file for p in luigi.configuration.LuigiConfigParser._config_paths[::-1]: if os.path.exists(p): if allow_binds: mount(p, dst("luigi.cfg")) env["LUIGI_CONFIG_PATH"] = dst("luigi.cfg") else: env["LUIGI_CONFIG_PATH"] = p break # add staging directories if (self.stagein_info or self.stageout_info) and not allow_binds: raise Exception( "cannot use stage-in or -out if binds are not allowed") if self.stagein_info: env["LAW_SANDBOX_STAGEIN_DIR"] = dst(stagein_dir_name) mount(self.stagein_info.stage_dir.path, dst(stagein_dir_name)) if self.stageout_info: env["LAW_SANDBOX_STAGEOUT_DIR"] = dst(stageout_dir_name) mount(self.stageout_info.stage_dir.path, dst(stageout_dir_name)) # forward volumes defined in the config and by the task vols = self._get_volumes() if vols and not allow_binds: raise Exception( "cannot forward volumes to sandbox if binds are not allowed") for hdir, cdir in six.iteritems(vols): if not cdir: mount(hdir) else: cdir = self._expand_volume(cdir, bin_dir=dst(bin_dir), python_dir=dst(python_dir)) mount(hdir, cdir) # handle local scheduling within the container if self.force_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) # get the singularity exec command, add arguments from above singularity_exec_cmd = self._singularity_exec_cmd() + args # build commands to set up environment setup_cmds = self._build_setup_cmds(env) # build the final command cmd = quote_cmd(singularity_exec_cmd + [ self.image, "bash", "-l", "-c", "; ".join(flatten(setup_cmds, proxy_cmd.build())), ]) return cmd
def execute(args): """ Executes the *completion* subprogram with parsed commandline *args*. """ print(law_src_path("cli", "completion.sh"))
def cmd(self, proxy_cmd): cfg = Config.instance() # singularity exec command arguments args = ["-e"] # add args configured on the task args_getter = getattr(self.task, "singularity_args", None) args += make_list(args_getter() if callable(args_getter) else self.default_singularity_args) # environment variables to set env = self._get_env() # prevent python from writing byte code files env["PYTHONDONTWRITEBYTECODE"] = "1" # helper to build forwarded paths section = self.get_config_section() forward_dir = cfg.get_expanded(section, "forward_dir") python_dir = cfg.get_expanded(section, "python_dir") bin_dir = cfg.get_expanded(section, "bin_dir") stagein_dir = cfg.get_expanded(section, "stagein_dir") stageout_dir = cfg.get_expanded(section, "stageout_dir") def dst(*args): return os.path.join(forward_dir, *(str(arg) for arg in args)) # helper for mounting a volume volume_srcs = [] def mount(*vol): src = vol[0] # make sure, the same source directory is not mounted twice if src in volume_srcs: return volume_srcs.append(src) # ensure that source directories exist if not os.path.isfile(src) and not os.path.exists(src): os.makedirs(src) # store the mount point args.extend(["-B", ":".join(vol)]) if self.forward_env: # adjust path variables if self.allow_binds: env["PATH"] = os.pathsep.join(["$PATH", dst("bin")]) env["PYTHONPATH"] = os.pathsep.join(["$PYTHONPATH", dst(python_dir)]) else: env["PATH"] = "$PATH" env["PYTHONPATH"] = "$PYTHONPATH" # forward python directories of law and dependencies for mod in law_deps: path = os.path.dirname(mod.__file__) name, ext = os.path.splitext(os.path.basename(mod.__file__)) if name == "__init__": vsrc = path vdst = dst(python_dir, os.path.basename(path)) else: vsrc = os.path.join(path, name + ".py") vdst = dst(python_dir, name + ".py") if self.allow_binds: mount(vsrc, vdst) else: dep_path = os.path.dirname(vsrc) if dep_path not in env["PYTHONPATH"].split(os.pathsep): env["PYTHONPATH"] = os.pathsep.join([env["PYTHONPATH"], dep_path]) # forward the law cli dir to bin as it contains a law executable if self.allow_binds: env["PATH"] = os.pathsep.join([env["PATH"], dst(python_dir, "law", "cli")]) else: cli_dir = os.path.join(law_src_path(), "cli") env["PATH"] = os.pathsep.join([env["PATH"], cli_dir]) # forward the law config file if cfg.config_file: if self.allow_binds: mount(cfg.config_file, dst("law.cfg")) env["LAW_CONFIG_FILE"] = dst("law.cfg") else: env["LAW_CONFIG_FILE"] = cfg.config_file # forward the luigi config file for p in luigi.configuration.LuigiConfigParser._config_paths[::-1]: if os.path.exists(p): if self.allow_binds: mount(p, dst("luigi.cfg")) env["LUIGI_CONFIG_PATH"] = dst("luigi.cfg") else: env["LUIGI_CONFIG_PATH"] = p break if (self.stagein_info or self.stageout_info) and not self.allow_binds: raise Exception("Cannot use stage-in or -out if binds are not allowed.") # add staging directories if self.stagein_info: env["LAW_SANDBOX_STAGEIN_DIR"] = dst(stagein_dir) mount(self.stagein_info.stage_dir.path, dst(stagein_dir)) if self.stageout_info: env["LAW_SANDBOX_STAGEOUT_DIR"] = dst(stageout_dir) mount(self.stageout_info.stage_dir.path, dst(stageout_dir)) # forward volumes defined in the config and by the task vols = self._get_volumes() if vols and not self.allow_binds: raise Exception("Cannot forward volumes to Sandbox if binds are not allowed.") for hdir, cdir in six.iteritems(vols): if not cdir: mount(hdir) else: cdir = cdir.replace("${PY}", dst(python_dir)).replace("${BIN}", dst(bin_dir)) mount(hdir, cdir) # extend by arguments needed for both env loading and executing the job args.extend(self.common_args()) # build commands to set up environment setup_cmds = self._build_setup_cmds(env) # handle scheduling within the container ls_flag = "--local-scheduler" if self.force_local_scheduler() and ls_flag not in proxy_cmd: proxy_cmd.append(ls_flag) # build the final command cmd = quote_cmd(["singularity", "exec"] + args + [self.image, "bash", "-l", "-c", "; ".join(flatten(setup_cmds, " ".join(proxy_cmd))) ]) return cmd
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2] -> "_0To3" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix _postfix = lambda path: self.job_file_factory.postfix_file( path, postfix) pf = lambda s: "postfix:{}".format(s) # collect task parameters task_params = task.as_branch(branches[0]).cli_args(exclude={"branch"}) task_params += global_cmdline_args() # force the local scheduler? ls_flag = "--local-scheduler" if ls_flag not in task_params and task.lsf_use_local_scheduler(): task_params.append(ls_flag) # job script arguments job_args = JobArguments( task_module=task.__class__.__module__, task_family=task.task_family, task_params=task_params, start_branch=branches[0], end_branch=branches[-1] + 1, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.attempts.get(job_num, 0)), ) config.command = "bash {} {}".format(_postfix("job.sh"), job_args.join()) # meta infos config.job_name = task.task_id config.emails = True # prepare render variables config.render_variables = {} # input files config.input_files = [law_src_path("job", "job.sh")] # add the bootstrap file bootstrap_file = task.lsf_bootstrap_file() if bootstrap_file: config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.lsf_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files ] config.render_variables["input_files"] = " ".join(input_basenames) # output files config.output_files = [] # logging # we do not use lsf's logging mechanism since it requires that the submission directory # is present when it retrieves logs, and therefore we rely on the job.sh script config.stdout = None config.stderr = None if task.transfer_logs: log_file = "stdall.txt" config.output_files.append(log_file) config.render_variables["log_file"] = pf(log_file) # we can use lsf's file stageout only when the output directory is local # otherwise, one should use the stageout_file and stageout manually output_dir = task.lsf_output_directory() if not isinstance(output_dir, LocalDirectoryTarget): del config.output_files[:] else: config.absolute_paths = True config.cwd = output_dir.path # task hook config = task.lsf_job_config(config, job_num, branches) return self.job_file_factory(**config.__dict__)
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix pf = lambda s: "__law_job_postfix__:{}".format(s) # get the actual wrapper file that will be executed by the remote job wrapper_file = get_path(task.glite_wrapper_file()) config.executable = os.path.basename(wrapper_file) # collect task parameters proxy_cmd = ProxyCommand(task.as_branch(branches[0]), exclude_task_args={"branch"}, exclude_global_args=["workers", "local-scheduler"]) if task.glite_use_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) for key, value in OrderedDict(task.glite_cmdline_args()).items(): proxy_cmd.add_arg(key, value, overwrite=True) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=proxy_cmd.build(skip_run=True), branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) config.arguments = job_args.join() # meta infos config.output_uri = task.glite_output_uri() # prepare render variables config.render_variables = {} # input files config.input_files = [wrapper_file, law_src_path("job", "job.sh")] config.render_variables["job_file"] = pf("job.sh") # add the bootstrap file bootstrap_file = task.glite_bootstrap_file() config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf(os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.glite_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf(os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf(os.path.basename(dashboard_file)) # output files config.output_files = [] # custom content config.custom_content = [] # log file if task.transfer_logs: log_file = "stdall.txt" config.stdout = log_file config.stderr = log_file config.custom_log_file = log_file config.render_variables["log_file"] = pf(log_file) else: config.stdout = None config.stderr = None # task hook config = task.glite_job_config(config, job_num, branches) # determine basenames of input files and add that list to the render data input_basenames = [pf(os.path.basename(path)) for path in config.input_files] config.render_variables["input_files"] = " ".join(input_basenames) # build the job file and get the sanitized config job_file, config = self.job_file_factory(**config.__dict__) # determine the custom log file uri if set abs_log_file = None if config.custom_log_file: abs_log_file = os.path.join(config.output_uri, config.custom_log_file) # return job and log files return {"job": job_file, "log": abs_log_file}
def create_job_file(self, job_num, branches): task = self.task # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) # create the config c = self.job_file_factory.Config() c.input_files = DeprecatedInputFiles() c.output_files = [] c.render_variables = {} c.custom_content = [] # get the actual wrapper file that will be executed by the remote job c.executable = get_path(task.htcondor_wrapper_file()) c.input_files["executable_file"] = c.executable law_job_file = law_src_path("job", "law_job.sh") if c.executable != law_job_file: c.input_files["job_file"] = law_job_file # collect task parameters proxy_cmd = ProxyCommand( task.as_branch(branches[0]), exclude_task_args={"branch"}, exclude_global_args=["workers", "local-scheduler"]) if task.htcondor_use_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) for key, value in OrderedDict(task.htcondor_cmdline_args()).items(): proxy_cmd.add_arg(key, value, overwrite=True) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=proxy_cmd.build(skip_run=True), branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) c.arguments = job_args.join() # add the bootstrap file bootstrap_file = task.htcondor_bootstrap_file() if bootstrap_file: c.input_files["bootstrap_file"] = bootstrap_file # add the stageout file stageout_file = task.htcondor_stageout_file() if stageout_file: c.input_files["stageout_file"] = stageout_file # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: c.input_files["dashboard_file"] = dashboard_file # logging # we do not use htcondor's logging mechanism since it might require that the submission # directory is present when it retrieves logs, and therefore we use a custom log file c.log = None c.stdout = None c.stderr = None if task.transfer_logs: c.custom_log_file = "stdall.txt" # when the output dir is local, we can run within this directory for easier output file # handling and use absolute paths for input files output_dir = task.htcondor_output_directory() output_dir_is_local = isinstance(output_dir, LocalDirectoryTarget) if output_dir_is_local: c.absolute_paths = True c.custom_content.append(("initialdir", output_dir.path)) # task hook c = task.htcondor_job_config(c, job_num, branches) # when the output dir is not local, direct output files are not possible if not output_dir_is_local: del c.output_files[:] # build the job file and get the sanitized config job_file, c = self.job_file_factory(postfix=postfix, **c.__dict__) # get the location of the custom local log file if any abs_log_file = None if output_dir_is_local and c.custom_log_file: abs_log_file = os.path.join(output_dir.path, c.custom_log_file) # return job and log files return {"job": job_file, "log": abs_log_file}
def slurm_wrapper_file(self): return law_src_path("job", "law_job.sh")
def execute(args): """ Executes the *location* subprogram with parsed commandline *args*. """ print(law_src_path())
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix pf = lambda s: "postfix:{}".format(s) # get the actual wrapper file that will be executed by the remote job wrapper_file = get_path(task.arc_wrapper_file()) config.executable = os.path.basename(wrapper_file) # collect task parameters task_params = task.as_branch(branches[0]).cli_args(exclude={"branch"}) task_params += global_cmdline_args() # add and remove some arguments task_params = remove_cmdline_arg(task_params, "--workers", 2) if task.arc_use_local_scheduler(): task_params = add_cmdline_arg(task_params, "--local-scheduler") for arg in task.arc_cmdline_args() or []: if isinstance(arg, tuple): task_params = add_cmdline_arg(task_params, *arg) else: task_params = add_cmdline_arg(task_params, arg) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=task_params, branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) config.arguments = job_args.join() # meta infos config.job_name = task.task_id config.output_uri = task.arc_output_uri() # prepare render variables config.render_variables = {} # input files config.input_files = [wrapper_file, law_src_path("job", "job.sh")] config.render_variables["job_file"] = pf("job.sh") # add the bootstrap file bootstrap_file = task.arc_bootstrap_file() config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.arc_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # output files config.output_files = [] # custom content config.custom_content = [] # log files config.log = None if task.transfer_logs: log_file = "stdall.txt" config.stdout = log_file config.stderr = log_file config.output_files.append(log_file) config.render_variables["log_file"] = pf(log_file) else: config.stdout = None config.stderr = None # task hook config = task.arc_job_config(config, job_num, branches) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files ] config.render_variables["input_files"] = " ".join(input_basenames) return self.job_file_factory(**config.__dict__)
def create_job_file(self, job_num, branches): task = self.task # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) # create the config c = self.job_file_factory.Config() c.input_files = DeprecatedInputFiles() c.output_files = [] c.render_variables = {} c.custom_content = [] # get the actual wrapper file that will be executed by the remote job c.executable = get_path(task.glite_wrapper_file()) c.input_files["executable_file"] = c.executable law_job_file = law_src_path("job", "law_job.sh") if c.executable != law_job_file: c.input_files["job_file"] = law_job_file # collect task parameters proxy_cmd = ProxyCommand( task.as_branch(branches[0]), exclude_task_args={"branch"}, exclude_global_args=["workers", "local-scheduler"]) if task.glite_use_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) for key, value in OrderedDict(task.glite_cmdline_args()).items(): proxy_cmd.add_arg(key, value, overwrite=True) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=proxy_cmd.build(skip_run=True), branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) c.arguments = job_args.join() # add the bootstrap file bootstrap_file = task.glite_bootstrap_file() if bootstrap_file: c.input_files["bootstrap_file"] = bootstrap_file # add the stageout file stageout_file = task.glite_stageout_file() if stageout_file: c.input_files["stageout_file"] = stageout_file # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: c.input_files["dashboard_file"] = dashboard_file # log file c.stdout = None c.stderr = None if task.transfer_logs: log_file = "stdall.txt" c.stdout = log_file c.stderr = log_file c.custom_log_file = log_file # meta infos c.output_uri = task.glite_output_uri() # task hook c = task.glite_job_config(c, job_num, branches) # build the job file and get the sanitized config job_file, c = self.job_file_factory(postfix=postfix, **c.__dict__) # determine the custom log file uri if set abs_log_file = None if c.custom_log_file: abs_log_file = os.path.join(c.output_uri, c.custom_log_file) # return job and log files return {"job": job_file, "log": abs_log_file}
def load(*packages): """ Loads contrib *packages* and adds them to the law namespace. Effectively, this removes the necessity of having ``contrib`` module in imports or when accessing members. Example: .. code-block:: python import law law.contrib.load("slack") print(law.slack.NotifySlackParameter) # instead of law.contrib.slack.NotifySlackParameter # -> <class '...'> It is ensured that packages are loaded only once. """ for pkg in flatten(packages): if pkg in loaded_packages: logger.debug("skip contrib package '{}', already loaded".format(pkg)) continue elif not os.path.exists(law_src_path("contrib", pkg, "__init__.py")): raise Exception("contrib package '{}' does not exist".format(pkg)) elif getattr(law, pkg, None): raise Exception("cannot load contrib package '{}', attribute with that name already " "exists on the law module".format(pkg)) mod = __import__("law.contrib.{}".format(pkg), globals(), locals(), [pkg]) setattr(law, pkg, mod) law.__all__.append(pkg) loaded_packages.append(pkg) logger.debug("loaded contrib package '{}'".format(pkg)) # the contrib mechanism used to add all members of the module to the main law namespace # but given the growing number of contrib packages, the chance of collisions is not # negligible any longer, so for the moment add dummy objects only for callables to the law # module that, when used, raise verbose exceptions # (to be removed for v0.1) def dummy_factory(pkg, attr, member): def _raise(): raise AttributeError("due to a change in 'law.contrib.load()', the attribute '{0}' " "is no longer accessible on the global 'law' namespace, please use " "'law.{1}.{0}' instead".format(attr, pkg)) if isinstance(member, types.FunctionType): def dummy(*args, **kwargs): """ Dummy function throwing an *AttributeError* when called. """ _raise() else: class dummy(member): """ Dummy class throwing an *AttributeError* when instantiated. """ exclude_index = True name = str(uuid.uuid4()) def __new__(cls, *args, **kwargs): _raise() return dummy for attr in mod.__all__: member = getattr(mod, attr) if callable(member): setattr(law, attr, dummy_factory(pkg, attr, member)) else: logger.debug("skip creating dummy object for attribute {} of package {}".format( attr, pkg))
def execute(args): print(law_src_path("cli", "completion.sh"))