def load_args(self, exclude_task_args=None, exclude_global_args=None): args = [] # add cli args as key value tuples args.extend(self.task.cli_args(exclude=exclude_task_args).items()) # add global args as key value tuples args.extend(global_cmdline_args(exclude=exclude_global_args).items()) return args
def proxy_cmd(self): # start with "law run <module.task>" cmd = ["law", "run", "{}.{}".format(self.task.__module__, self.task.__class__.__name__)] # add cli args, exclude some parameters cmd.extend(self.task.cli_args(exclude=self.task.exclude_params_sandbox)) # add global args cmd.extend(global_cmdline_args()) return cmd
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix pf = lambda s: "postfix:{}".format(s) # get the actual wrapper file that will be executed by the remote job wrapper_file = get_path(task.arc_wrapper_file()) config.executable = os.path.basename(wrapper_file) # collect task parameters task_params = task.as_branch(branches[0]).cli_args(exclude={"branch"}) task_params += global_cmdline_args() # add and remove some arguments task_params = remove_cmdline_arg(task_params, "--workers", 2) if task.arc_use_local_scheduler(): task_params = add_cmdline_arg(task_params, "--local-scheduler") for arg in task.arc_cmdline_args() or []: if isinstance(arg, tuple): task_params = add_cmdline_arg(task_params, *arg) else: task_params = add_cmdline_arg(task_params, arg) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=task_params, branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) config.arguments = job_args.join() # meta infos config.job_name = task.task_id config.output_uri = task.arc_output_uri() # prepare render variables config.render_variables = {} # input files config.input_files = [wrapper_file, law_src_path("job", "job.sh")] config.render_variables["job_file"] = pf("job.sh") # add the bootstrap file bootstrap_file = task.arc_bootstrap_file() config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.arc_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # output files config.output_files = [] # custom content config.custom_content = [] # log files config.log = None if task.transfer_logs: log_file = "stdall.txt" config.stdout = log_file config.stderr = log_file config.output_files.append(log_file) config.render_variables["log_file"] = pf(log_file) else: config.stdout = None config.stderr = None # task hook config = task.arc_job_config(config, job_num, branches) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files ] config.render_variables["input_files"] = " ".join(input_basenames) return self.job_file_factory(**config.__dict__)
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix pf = lambda s: "postfix:{}".format(s) # get the actual wrapper file that will be executed by the remote job wrapper_file = get_path(task.htcondor_wrapper_file()) config.executable = os.path.basename(wrapper_file) # collect task parameters task_params = task.as_branch(branches[0]).cli_args(exclude={"branch"}) task_params += global_cmdline_args( exclude=[("--workers", 1), ("--local-scheduler", 1)]) if task.htcondor_use_local_scheduler(): task_params = add_cmdline_arg(task_params, "--local-scheduler", "True") for arg in task.htcondor_cmdline_args() or []: if isinstance(arg, tuple): task_params = add_cmdline_arg(task_params, *arg) else: task_params = add_cmdline_arg(task_params, arg) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=task_params, branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) config.arguments = job_args.join() # prepare render variables config.render_variables = {} # input files config.input_files = [wrapper_file, law_src_path("job", "job.sh")] config.render_variables["job_file"] = pf("job.sh") # add the bootstrap file bootstrap_file = task.htcondor_bootstrap_file() if bootstrap_file: config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.htcondor_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # output files config.output_files = [] # custom content config.custom_content = [] # logging # we do not use condor's logging mechanism since it requires that the submission directory # is present when it retrieves logs, and therefore we rely on the job.sh script config.log = None config.stdout = None config.stderr = None if task.transfer_logs: log_file = "stdall.txt" config.output_files.append(log_file) config.render_variables["log_file"] = pf(log_file) # we can use condor's file stageout only when the output directory is local # otherwise, one should use the stageout_file and stageout manually output_dir = task.htcondor_output_directory() if isinstance(output_dir, LocalDirectoryTarget): config.absolute_paths = True config.custom_content.append(("initialdir", output_dir.path)) else: del config.output_files[:] # task hook config = task.htcondor_job_config(config, job_num, branches) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files[1:] ] config.render_variables["input_files"] = " ".join(input_basenames) return self.job_file_factory(**config.__dict__)
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix pf = lambda s: "postfix:{}".format(s) # executable config.executable = "bash_wrapper.sh" # collect task parameters task_params = task.as_branch(branches[0]).cli_args(exclude={"branch"}) task_params += global_cmdline_args() # force the local scheduler? ls_flag = "--local-scheduler" if ls_flag not in task_params and task.glite_use_local_scheduler(): task_params.append(ls_flag) # job script arguments job_args = JobArguments( task_module=task.__class__.__module__, task_family=task.task_family, task_params=task_params, branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.attempts.get(job_num, 0)), ) config.arguments = job_args.join() # meta infos config.output_uri = task.glite_output_uri() # prepare render variables config.render_variables = {} # input files config.input_files = [ law_src_path("job", "bash_wrapper.sh"), law_src_path("job", "job.sh") ] config.render_variables["job_file"] = pf("job.sh") # add the bootstrap file bootstrap_file = task.glite_bootstrap_file() config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.glite_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files ] config.render_variables["input_files"] = " ".join(input_basenames) # output files config.output_files = [] # custom content config.custom_content = [] # log file if task.transfer_logs: log_file = "stdall.txt" config.stdout = log_file config.stderr = log_file config.output_files.append(log_file) config.render_variables["log_file"] = pf(log_file) else: config.stdout = None config.stderr = None # task hook config = task.glite_job_config(config, job_num, branches) return self.job_file_factory(**config.__dict__)
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2] -> "_0To3" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix _postfix = lambda path: self.job_file_factory.postfix_file( path, postfix) pf = lambda s: "postfix:{}".format(s) # collect task parameters task_params = task.as_branch(branches[0]).cli_args(exclude={"branch"}) task_params += global_cmdline_args() # force the local scheduler? ls_flag = "--local-scheduler" if ls_flag not in task_params and task.lsf_use_local_scheduler(): task_params.append(ls_flag) # job script arguments job_args = JobArguments( task_module=task.__class__.__module__, task_family=task.task_family, task_params=task_params, start_branch=branches[0], end_branch=branches[-1] + 1, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.attempts.get(job_num, 0)), ) config.command = "bash {} {}".format(_postfix("job.sh"), job_args.join()) # meta infos config.job_name = task.task_id config.emails = True # prepare render variables config.render_variables = {} # input files config.input_files = [law_src_path("job", "job.sh")] # add the bootstrap file bootstrap_file = task.lsf_bootstrap_file() if bootstrap_file: config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.lsf_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files ] config.render_variables["input_files"] = " ".join(input_basenames) # output files config.output_files = [] # logging # we do not use lsf's logging mechanism since it requires that the submission directory # is present when it retrieves logs, and therefore we rely on the job.sh script config.stdout = None config.stderr = None if task.transfer_logs: log_file = "stdall.txt" config.output_files.append(log_file) config.render_variables["log_file"] = pf(log_file) # we can use lsf's file stageout only when the output directory is local # otherwise, one should use the stageout_file and stageout manually output_dir = task.lsf_output_directory() if not isinstance(output_dir, LocalDirectoryTarget): del config.output_files[:] else: config.absolute_paths = True config.cwd = output_dir.path # task hook config = task.lsf_job_config(config, job_num, branches) return self.job_file_factory(**config.__dict__)