def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix pf = lambda s: "postfix:{}".format(s) # get the actual wrapper file that will be executed by the remote job wrapper_file = get_path(task.arc_wrapper_file()) config.executable = os.path.basename(wrapper_file) # collect task parameters task_params = task.as_branch(branches[0]).cli_args(exclude={"branch"}) task_params += global_cmdline_args() # add and remove some arguments task_params = remove_cmdline_arg(task_params, "--workers", 2) if task.arc_use_local_scheduler(): task_params = add_cmdline_arg(task_params, "--local-scheduler") for arg in task.arc_cmdline_args() or []: if isinstance(arg, tuple): task_params = add_cmdline_arg(task_params, *arg) else: task_params = add_cmdline_arg(task_params, arg) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=task_params, branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) config.arguments = job_args.join() # meta infos config.job_name = task.task_id config.output_uri = task.arc_output_uri() # prepare render variables config.render_variables = {} # input files config.input_files = [wrapper_file, law_src_path("job", "job.sh")] config.render_variables["job_file"] = pf("job.sh") # add the bootstrap file bootstrap_file = task.arc_bootstrap_file() config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.arc_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # output files config.output_files = [] # custom content config.custom_content = [] # log files config.log = None if task.transfer_logs: log_file = "stdall.txt" config.stdout = log_file config.stderr = log_file config.output_files.append(log_file) config.render_variables["log_file"] = pf(log_file) else: config.stdout = None config.stderr = None # task hook config = task.arc_job_config(config, job_num, branches) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files ] config.render_variables["input_files"] = " ".join(input_basenames) return self.job_file_factory(**config.__dict__)
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix pf = lambda s: "postfix:{}".format(s) # get the actual wrapper file that will be executed by the remote job wrapper_file = get_path(task.htcondor_wrapper_file()) config.executable = os.path.basename(wrapper_file) # collect task parameters task_params = task.as_branch(branches[0]).cli_args(exclude={"branch"}) task_params += global_cmdline_args( exclude=[("--workers", 1), ("--local-scheduler", 1)]) if task.htcondor_use_local_scheduler(): task_params = add_cmdline_arg(task_params, "--local-scheduler", "True") for arg in task.htcondor_cmdline_args() or []: if isinstance(arg, tuple): task_params = add_cmdline_arg(task_params, *arg) else: task_params = add_cmdline_arg(task_params, arg) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=task_params, branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) config.arguments = job_args.join() # prepare render variables config.render_variables = {} # input files config.input_files = [wrapper_file, law_src_path("job", "job.sh")] config.render_variables["job_file"] = pf("job.sh") # add the bootstrap file bootstrap_file = task.htcondor_bootstrap_file() if bootstrap_file: config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.htcondor_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # output files config.output_files = [] # custom content config.custom_content = [] # logging # we do not use condor's logging mechanism since it requires that the submission directory # is present when it retrieves logs, and therefore we rely on the job.sh script config.log = None config.stdout = None config.stderr = None if task.transfer_logs: log_file = "stdall.txt" config.output_files.append(log_file) config.render_variables["log_file"] = pf(log_file) # we can use condor's file stageout only when the output directory is local # otherwise, one should use the stageout_file and stageout manually output_dir = task.htcondor_output_directory() if isinstance(output_dir, LocalDirectoryTarget): config.absolute_paths = True config.custom_content.append(("initialdir", output_dir.path)) else: del config.output_files[:] # task hook config = task.htcondor_job_config(config, job_num, branches) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files[1:] ] config.render_variables["input_files"] = " ".join(input_basenames) return self.job_file_factory(**config.__dict__)
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix _postfix = lambda path: self.job_file_factory.postfix_file( path, postfix) pf = lambda s: "__law_job_postfix__:{}".format(s) # collect task parameters proxy_cmd = ProxyCommand( task.as_branch(branches[0]), exclude_task_args={"branch"}, exclude_global_args=["workers", "local-scheduler"]) if task.lsf_use_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) for key, value in OrderedDict(task.lsf_cmdline_args()).items(): proxy_cmd.add_arg(key, value, overwrite=True) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=proxy_cmd.build(skip_run=True), branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) # get the actual wrapper file that will be executed by the remote job wrapper_file = get_path(task.lsf_wrapper_file()) config.command = "bash {} {}".format( _postfix(os.path.basename(wrapper_file)), job_args.join()) # meta infos config.job_name = task.task_id config.emails = True # prepare render variables config.render_variables = {} # input files config.input_files = [wrapper_file, law_src_path("job", "job.sh")] # add the bootstrap file bootstrap_file = task.lsf_bootstrap_file() if bootstrap_file: config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.lsf_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # output files config.output_files = [] # custom content config.custom_content = [] # logging # we do not use lsf's logging mechanism since it requires that the submission directory # is present when it retrieves logs, and therefore we rely on the job.sh script config.stdout = None config.stderr = None if task.transfer_logs: log_file = "stdall.txt" config.custom_log_file = log_file config.render_variables["log_file"] = pf(log_file) # we can use lsf's file stageout only when the output directory is local # otherwise, one should use the stageout_file and stageout manually output_dir = task.lsf_output_directory() if isinstance(output_dir, LocalDirectoryTarget): config.absolute_paths = True config.cwd = output_dir.path else: del config.output_files[:] # task hook config = task.lsf_job_config(config, job_num, branches) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files ] config.render_variables["input_files"] = " ".join(input_basenames) # build the job file and get the sanitized config job_file, config = self.job_file_factory(**config.__dict__) # determine the absolute custom log file if set abs_log_file = None if config.custom_log_file and isinstance(output_dir, LocalDirectoryTarget): abs_log_file = output_dir.child(config.custom_log_file, type="f").path # return job and log files return {"job": job_file, "log": abs_log_file}
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix pf = lambda s: "postfix:{}".format(s) # executable config.executable = "bash_wrapper.sh" # collect task parameters task_params = task.as_branch(branches[0]).cli_args(exclude={"branch"}) task_params += global_cmdline_args() # force the local scheduler? ls_flag = "--local-scheduler" if ls_flag not in task_params and task.glite_use_local_scheduler(): task_params.append(ls_flag) # job script arguments job_args = JobArguments( task_module=task.__class__.__module__, task_family=task.task_family, task_params=task_params, branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.attempts.get(job_num, 0)), ) config.arguments = job_args.join() # meta infos config.output_uri = task.glite_output_uri() # prepare render variables config.render_variables = {} # input files config.input_files = [ law_src_path("job", "bash_wrapper.sh"), law_src_path("job", "job.sh") ] config.render_variables["job_file"] = pf("job.sh") # add the bootstrap file bootstrap_file = task.glite_bootstrap_file() config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.glite_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files ] config.render_variables["input_files"] = " ".join(input_basenames) # output files config.output_files = [] # custom content config.custom_content = [] # log file if task.transfer_logs: log_file = "stdall.txt" config.stdout = log_file config.stderr = log_file config.output_files.append(log_file) config.render_variables["log_file"] = pf(log_file) else: config.stdout = None config.stderr = None # task hook config = task.glite_job_config(config, job_num, branches) return self.job_file_factory(**config.__dict__)
def create_job_file(self, job_num, branches): task = self.task # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) # create the config c = self.job_file_factory.Config() c.input_files = DeprecatedInputFiles() c.output_files = [] c.render_variables = {} c.custom_content = [] # get the actual wrapper file that will be executed by the remote job c.executable = get_path(task.glite_wrapper_file()) c.input_files["executable_file"] = c.executable law_job_file = law_src_path("job", "law_job.sh") if c.executable != law_job_file: c.input_files["job_file"] = law_job_file # collect task parameters proxy_cmd = ProxyCommand( task.as_branch(branches[0]), exclude_task_args={"branch"}, exclude_global_args=["workers", "local-scheduler"]) if task.glite_use_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) for key, value in OrderedDict(task.glite_cmdline_args()).items(): proxy_cmd.add_arg(key, value, overwrite=True) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=proxy_cmd.build(skip_run=True), branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) c.arguments = job_args.join() # add the bootstrap file bootstrap_file = task.glite_bootstrap_file() if bootstrap_file: c.input_files["bootstrap_file"] = bootstrap_file # add the stageout file stageout_file = task.glite_stageout_file() if stageout_file: c.input_files["stageout_file"] = stageout_file # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: c.input_files["dashboard_file"] = dashboard_file # log file c.stdout = None c.stderr = None if task.transfer_logs: log_file = "stdall.txt" c.stdout = log_file c.stderr = log_file c.custom_log_file = log_file # meta infos c.output_uri = task.glite_output_uri() # task hook c = task.glite_job_config(c, job_num, branches) # build the job file and get the sanitized config job_file, c = self.job_file_factory(postfix=postfix, **c.__dict__) # determine the custom log file uri if set abs_log_file = None if c.custom_log_file: abs_log_file = os.path.join(c.output_uri, c.custom_log_file) # return job and log files return {"job": job_file, "log": abs_log_file}
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix pf = lambda s: "__law_job_postfix__:{}".format(s) # get the actual wrapper file that will be executed by the remote job wrapper_file = get_path(task.glite_wrapper_file()) config.executable = os.path.basename(wrapper_file) # collect task parameters proxy_cmd = ProxyCommand(task.as_branch(branches[0]), exclude_task_args={"branch"}, exclude_global_args=["workers", "local-scheduler"]) if task.glite_use_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) for key, value in OrderedDict(task.glite_cmdline_args()).items(): proxy_cmd.add_arg(key, value, overwrite=True) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=proxy_cmd.build(skip_run=True), branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) config.arguments = job_args.join() # meta infos config.output_uri = task.glite_output_uri() # prepare render variables config.render_variables = {} # input files config.input_files = [wrapper_file, law_src_path("job", "job.sh")] config.render_variables["job_file"] = pf("job.sh") # add the bootstrap file bootstrap_file = task.glite_bootstrap_file() config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf(os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.glite_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf(os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf(os.path.basename(dashboard_file)) # output files config.output_files = [] # custom content config.custom_content = [] # log file if task.transfer_logs: log_file = "stdall.txt" config.stdout = log_file config.stderr = log_file config.custom_log_file = log_file config.render_variables["log_file"] = pf(log_file) else: config.stdout = None config.stderr = None # task hook config = task.glite_job_config(config, job_num, branches) # determine basenames of input files and add that list to the render data input_basenames = [pf(os.path.basename(path)) for path in config.input_files] config.render_variables["input_files"] = " ".join(input_basenames) # build the job file and get the sanitized config job_file, config = self.job_file_factory(**config.__dict__) # determine the custom log file uri if set abs_log_file = None if config.custom_log_file: abs_log_file = os.path.join(config.output_uri, config.custom_log_file) # return job and log files return {"job": job_file, "log": abs_log_file}
def create_job_file(self, job_num, branches): task = self.task config = self.job_file_factory.Config() # the file postfix is pythonic range made from branches, e.g. [0, 1, 2] -> "_0To3" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) config.postfix = postfix _postfix = lambda path: self.job_file_factory.postfix_file( path, postfix) pf = lambda s: "postfix:{}".format(s) # collect task parameters task_params = task.as_branch(branches[0]).cli_args(exclude={"branch"}) task_params += global_cmdline_args() # force the local scheduler? ls_flag = "--local-scheduler" if ls_flag not in task_params and task.lsf_use_local_scheduler(): task_params.append(ls_flag) # job script arguments job_args = JobArguments( task_module=task.__class__.__module__, task_family=task.task_family, task_params=task_params, start_branch=branches[0], end_branch=branches[-1] + 1, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.attempts.get(job_num, 0)), ) config.command = "bash {} {}".format(_postfix("job.sh"), job_args.join()) # meta infos config.job_name = task.task_id config.emails = True # prepare render variables config.render_variables = {} # input files config.input_files = [law_src_path("job", "job.sh")] # add the bootstrap file bootstrap_file = task.lsf_bootstrap_file() if bootstrap_file: config.input_files.append(bootstrap_file) config.render_variables["bootstrap_file"] = pf( os.path.basename(bootstrap_file)) # add the stageout file stageout_file = task.lsf_stageout_file() if stageout_file: config.input_files.append(stageout_file) config.render_variables["stageout_file"] = pf( os.path.basename(stageout_file)) # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: config.input_files.append(dashboard_file) config.render_variables["dashboard_file"] = pf( os.path.basename(dashboard_file)) # determine basenames of input files and add that list to the render data input_basenames = [ pf(os.path.basename(path)) for path in config.input_files ] config.render_variables["input_files"] = " ".join(input_basenames) # output files config.output_files = [] # logging # we do not use lsf's logging mechanism since it requires that the submission directory # is present when it retrieves logs, and therefore we rely on the job.sh script config.stdout = None config.stderr = None if task.transfer_logs: log_file = "stdall.txt" config.output_files.append(log_file) config.render_variables["log_file"] = pf(log_file) # we can use lsf's file stageout only when the output directory is local # otherwise, one should use the stageout_file and stageout manually output_dir = task.lsf_output_directory() if not isinstance(output_dir, LocalDirectoryTarget): del config.output_files[:] else: config.absolute_paths = True config.cwd = output_dir.path # task hook config = task.lsf_job_config(config, job_num, branches) return self.job_file_factory(**config.__dict__)
def create_job_file(self, job_num, branches): task = self.task # the file postfix is pythonic range made from branches, e.g. [0, 1, 2, 4] -> "_0To5" postfix = "_{}To{}".format(branches[0], branches[-1] + 1) # create the config c = self.job_file_factory.Config() c.input_files = DeprecatedInputFiles() c.output_files = [] c.render_variables = {} c.custom_content = [] # get the actual wrapper file that will be executed by the remote job c.executable = get_path(task.htcondor_wrapper_file()) c.input_files["executable_file"] = c.executable law_job_file = law_src_path("job", "law_job.sh") if c.executable != law_job_file: c.input_files["job_file"] = law_job_file # collect task parameters proxy_cmd = ProxyCommand( task.as_branch(branches[0]), exclude_task_args={"branch"}, exclude_global_args=["workers", "local-scheduler"]) if task.htcondor_use_local_scheduler(): proxy_cmd.add_arg("--local-scheduler", "True", overwrite=True) for key, value in OrderedDict(task.htcondor_cmdline_args()).items(): proxy_cmd.add_arg(key, value, overwrite=True) # job script arguments job_args = JobArguments( task_cls=task.__class__, task_params=proxy_cmd.build(skip_run=True), branches=branches, auto_retry=False, dashboard_data=self.dashboard.remote_hook_data( job_num, self.submission_data.attempts.get(job_num, 0)), ) c.arguments = job_args.join() # add the bootstrap file bootstrap_file = task.htcondor_bootstrap_file() if bootstrap_file: c.input_files["bootstrap_file"] = bootstrap_file # add the stageout file stageout_file = task.htcondor_stageout_file() if stageout_file: c.input_files["stageout_file"] = stageout_file # does the dashboard have a hook file? dashboard_file = self.dashboard.remote_hook_file() if dashboard_file: c.input_files["dashboard_file"] = dashboard_file # logging # we do not use htcondor's logging mechanism since it might require that the submission # directory is present when it retrieves logs, and therefore we use a custom log file c.log = None c.stdout = None c.stderr = None if task.transfer_logs: c.custom_log_file = "stdall.txt" # when the output dir is local, we can run within this directory for easier output file # handling and use absolute paths for input files output_dir = task.htcondor_output_directory() output_dir_is_local = isinstance(output_dir, LocalDirectoryTarget) if output_dir_is_local: c.absolute_paths = True c.custom_content.append(("initialdir", output_dir.path)) # task hook c = task.htcondor_job_config(c, job_num, branches) # when the output dir is not local, direct output files are not possible if not output_dir_is_local: del c.output_files[:] # build the job file and get the sanitized config job_file, c = self.job_file_factory(postfix=postfix, **c.__dict__) # get the location of the custom local log file if any abs_log_file = None if output_dir_is_local and c.custom_log_file: abs_log_file = os.path.join(output_dir.path, c.custom_log_file) # return job and log files return {"job": job_file, "log": abs_log_file}