def test_job_arg_name_files(tmpdir, fancyname): # Test to check that when a named argument is added to a Job, and the Job # is built with fancyname=True, the Job submit file and the # error/log/output files for the argument start with the same index. # E.g. job_(date)_01.submit, job_(date)_01.error, etc. # Regression test for issue #47 submit_dir = str(tmpdir.mkdir('submit')) job = Job('testjob', example_script, submit=submit_dir) job.add_arg('arg', name='argname') dagman = Dagman('exampledagman', submit=submit_dir) dagman.add_job(job) dagman.build(fancyname=fancyname) with open(dagman.submit_file, 'r') as dagman_submit_file: dagman_submit_lines = dagman_submit_file.readlines() # Get root of the dagman submit file (submit file basename w/o .submit) submit_file_line = dagman_submit_lines[0] submit_file_basename = submit_file_line.split(os.sep)[-1].rstrip() submit_file_root = os.path.splitext(submit_file_basename)[0] # Get job_name variable (used to built error/log/output file basenames) jobname_line = dagman_submit_lines[2] jobname = jobname_line.split('"')[-2] other_file_root = '_'.join(jobname.split('_')[:-1]) assert submit_file_root == other_file_root
def test_init_retry(): # Test that global retry applies to add_arg without a retry specified and # not when add_arg has a retry specified job = Job(name='jobname', executable=example_script, retry=7) job.add_arg('arg1') job.add_arg('arg2', retry=3) assert len(job.args) == 2 assert job.args[0].retry == 7 assert job.args[1].retry == 3
def test_add_args(): # Test that add_args is equivalent to multiple add_arg job_1 = Job('job1', example_script) for i in range(10): job_1.add_arg('file_{}.hdf'.format(i)) job_2 = Job('job2', example_script) job_2.add_args(['file_{}.hdf'.format(i) for i in range(10)]) assert job_1.args == job_2.args
def test_iter_job_args(tmpdir): # Check node names yielded by _iter_job_args submit_dir = str(tmpdir.mkdir('submit')) job = Job('testjob', example_script, submit=submit_dir) job.add_arg('argument1', name='arg1') job.add_arg('argument2') job.build() for idx, (node_name, jobarg) in enumerate(_iter_job_args(job)): if jobarg.name is not None: assert node_name == '{}_{}'.format(job.submit_name, jobarg.name) else: assert node_name == '{}_arg_{}'.format(job.submit_name, idx)
def test_dagman_has_bad_node_names(tmpdir): submit_dir = str(tmpdir.mkdir('submit')) # Test all combinations jobs_names = ['testjob', 'testjob.', 'testjob', 'testjob+'] arg_names = ['argname', 'argname', 'argname+', 'argname.'] has_bad_node_names = [False, True, True, True] for job_name, arg_name, bad_node_names in zip(jobs_names, arg_names, has_bad_node_names): job = Job(job_name, example_script, submit=submit_dir) job.add_arg('arg', name=arg_name) dagman = Dagman('testdagman', submit=submit_dir) dagman.add_job(job) dagman.build() assert dagman._has_bad_node_names == bad_node_names
def ppplots(self): """ Set up job to create PP plots. """ from pycondor import Job # get executable jobexec = shutil.which("cwinpy_pe_generate_pp_plots") extra_lines = [] if self.accountgroup is not None: extra_lines.append("accounting_group = {}".format( self.accountgroup)) if self.accountuser is not None: extra_lines.append("accounting_group_user = {}".format( self.accountuser)) # create cwinpy_pe Job job = Job( "cwinpy_pe_pp_plots", jobexec, error=self.runner.dag.inputs.pe_log_directory, log=self.runner.dag.inputs.pe_log_directory, output=self.runner.dag.inputs.pe_log_directory, submit=self.runner.dag.inputs.submit_directory, universe="vanilla", request_memory=self.runner.dag.inputs.request_memory, getenv=self.getenv, queue=1, requirements=self.runner.dag.inputs.requirements, retry=self.runner.dag.inputs.retry, extra_lines=extra_lines, dag=self.runner.dag.pycondor_dag, ) jobargs = "--path '{}' ".format( os.path.join(self.basedir, "results", "*", "*")) jobargs += "--output {} ".format( os.path.join(self.basedir, "ppplot.png")) if self.outputsnr: jobargs += "--snrs " job.add_arg(jobargs) job.add_parents(self.runner.dag.pycondor_dag.nodes[:-1] ) # exclude cwinpy_pe_pp_plots job itself self.runner.dag.build()
def test_dagman_job_order(tmpdir): # Test to check that the order in which Jobs are added to a Dagman doesn't # change the Dagman submit file that is built. See issue #57. submit_dir = str(tmpdir.mkdir('submit')) dag_submit_lines = [] for order_idx in range(2): dagman = Dagman('testdagman', submit=submit_dir) job_child = Job('childjob', example_script, submit=submit_dir) job_child.add_arg('--length 200', name='200jobname') job_child.add_arg('--length 400', retry=3) job_parent = Job('parentjob', example_script, submit=submit_dir) job_parent.add_arg('--length 100') job_parent.add_child(job_child) if order_idx == 0: # Add job_parent to dagman first dagman.add_job(job_parent) dagman.add_job(job_child) else: # Add job_child to dagman first dagman.add_job(job_child) dagman.add_job(job_parent) dagman.build(fancyname=False) # Append submit file lines to dag_submit_lines with open(dagman.submit_file, 'r') as dag_submit_file: dag_submit_lines.append(dag_submit_file.readlines()) # Test that the same lines occur in the Dagman submit file for # adding the parent/child jobs in either order assert Counter(dag_submit_lines[0]) == Counter(dag_submit_lines[1])
replaced_name = logfile_name.replace("OUTPUT", o[:o.rindex(".")]) replaced_name = replaced_name.replace("ENERGY", energy) replaced_name = replaced_name.replace("ITERATION", str(i).zfill(zfill_amount)) args.args[logfile_index] = replaced_name transfer_files.append(replaced_name) file_remaps.append(replaced_name + '=' + os.path.join(logfile_dirname, replaced_name)) job = Job( descriptive_name + "_" + energy + "_" + str(i).zfill(zfill_amount), executable=script_file, output=output, error=error, log=log, submit=submit, #request_memory="5GB", extra_lines=[ "should_transfer_files = YES", "transfer_output_files = " + ", ".join(transfer_files), 'transfer_output_remaps = "' + '; '.join(file_remaps) + '"', "when_to_transfer_output = ON_EXIT" ], verbose=2 if args.verbose else 0) job.add_arg(" ".join([energy] + args.args)) dag.add_job(job) # Write all necessary submit files and submit dagman to Condor if args.maxjobs > 0: dag.build_submit(submit_options="-maxjobs " + str(args.maxjobs)) else: dag.build_submit()
options = sys.argv[1:] descriptive_name = "full_sim_" + sys.argv[1] + "_" + sys.argv[2] if "-n" in sys.argv: descriptive_name += "_n" + sys.argv[sys.argv.index("-n") + 1] else: descriptive_name += "_n10" # Declare the error, output, log, and submit directories for Condor Job error = '/data/user/fasig/pycondor' output = '/data/user/fasig/pycondor' log = '/data/user/fasig/pycondor' submit = '/data/user/fasig/pycondor' # Setting up a PyCondor Job job = Job(descriptive_name, script_file, error=error, output=output, log=log, submit=submit, verbose=2) # Adding arguments to job if len(options) > 0: job.add_arg(" ".join(options)) # Write all necessary submit files and submit job to Condor job.build_submit()
def generate_dag(times, flags=[], tag='gwdetchar-omega-batch', submit=False, outdir=os.getcwd(), universe='vanilla', condor_commands=get_condor_arguments()): """Construct a Directed Acyclic Graph (DAG) for a batch of omega scans Parameters ---------- times : `list` of `float` list of GPS times to scan flags : `list` of `str`, optional a list of command line flags to run for each job, defaults to an empty list tag : `str`, optional a helpful string to use to name the DAG, default: `'gwdetchar-omega-batch'` submit : `bool`, optional submit the DAG to condor, default: `False` outdir : `str`, optional the output directory in which to store files, will result in sub-directories called `'condor'` and `'log'`, default: `os.getcwd` universe : `str`, optional condor universe to run in, default: `'vanilla'` condor_commands : `list` of `str`, optional list of condor settings to process with, defaults to the output of `get_condor_arguments` Returns ------- dagman : `~pycondor.Dagman` the fully built DAG object """ logdir = os.path.join(outdir, 'logs') subdir = os.path.join(outdir, 'condor') executable = find_executable('gwdetchar-omega') # create DAG and jobs dagman = Dagman(name=tag, submit=subdir) job = Job(dag=dagman, name=os.path.basename(executable), executable=executable, universe=universe, submit=subdir, error=logdir, output=logdir, getenv=True, request_memory=4096 if universe != "local" else None, extra_lines=condor_commands) # make a node in the workflow for each event time for t in times: cmd = " ".join([str(t)] + ["--output-directory", os.path.join(outdir, str(t))] + flags) job.add_arg(cmd, name=str(t).replace(".", "_")) # write and submit the DAG dagman.build(fancyname=False) print("Workflow generated for {} times".format(len(times))) if submit: dagman.submit_dag(submit_options="-force") print("Submitted to condor, check status via:\n\n" "$ condor_q {}".format(getuser())) else: print( "Submit to condor via:\n\n" "$ condor_submit_dag {0.submit_file}".format(dagman), ) return dagman
"to the submitted script") args = parser.parse_args() # Get script basename start_index = args.script.rfind("/")+1 try: extension_index = args.script.index(".") except ValueError: extension_index = len(args.script) script_name = args.script[start_index:extension_index] # Setting up a PyCondor Job job = Job(name=script_name, executable=args.script, error=args.error, output=args.output, log=args.log, submit=args.submit, verbose=2, request_memory=args.request_memory, request_disk=args.request_disk, request_cpus=args.request_cpus, getenv=args.getenv, universe=args.universe, initialdir=args.initialdir, extra_lines=["should_transfer_files = YES", "when_to_transfer_output = ON_EXIT"]) # Adding arguments to job if len(args.script_args)>0: job.add_arg(" ".join(args.script_args)) # Write all necessary submit files and submit job to Condor job.build_submit()
transfer_files = [] file_remaps = [] if output_index != -1: replaced_name = output_name.replace("ITERATION", str(i + 1).zfill(4)) args.args[output_index] = replaced_name transfer_files.append(replaced_name) file_remaps.append(replaced_name + '=' + os.path.join(output_dirname, replaced_name)) job = Job( descriptive_name + "_" + str(i + 1).zfill(4), executable=script_file, output=output, error=error, log=log, submit=submit, #request_memory="5GB", extra_lines=[ "should_transfer_files = YES", "transfer_output_files = " + ", ".join(transfer_files), 'transfer_output_remaps = "' + '; '.join(file_remaps) + '"', "when_to_transfer_output = ON_EXIT" ], verbose=2 if args.verbose else 0) job.add_arg(" ".join([infile] + args.args)) dag.add_job(job) # Write all necessary submit files and submit dagman to Condor if args.maxjobs > 0: dag.build_submit(submit_options="-maxjobs " + str(args.maxjobs)) else: dag.build_submit()
def generate_dag(times, flags=[], tag='gwdetchar-omega-batch', submit=False, outdir=os.getcwd(), universe='vanilla', condor_commands=get_condor_arguments()): """Construct a Directed Acyclic Graph (DAG) for a batch of omega scans Parameters ---------- times : `list` of `float` list of GPS times to scan flags : `list` of `str`, optional a list of command line flags to run for each job, defaults to an empty list tag : `str`, optional a helpful string to use to name the DAG, default: `'gwdetchar-omega-batch'` submit : `bool`, optional submit the DAG to condor, default: `False` outdir : `str`, optional the output directory in which to store files, will result in sub-directories called `'condor'` and `'log'`, default: `os.getcwd` universe : `str`, optional condor universe to run in, default: `'vanilla'` condor_commands : `list` of `str`, optional list of condor settings to process with, defaults to the output of `get_condor_arguments` Returns ------- dagman : `~pycondor.Dagman` the fully built DAG object """ logdir = os.path.join(outdir, 'logs') subdir = os.path.join(outdir, 'condor') executable = find_executable('gwdetchar-omega') # create DAG and jobs dagman = Dagman(name=tag, submit=subdir) job = Job( dag=dagman, name=os.path.basename(executable), executable=executable, universe=universe, submit=subdir, error=logdir, output=logdir, getenv=True, request_memory=4096 if universe != "local" else None, extra_lines=condor_commands ) # make a node in the workflow for each event time for t in times: cmd = " ".join([str(t)] + [ "--output-directory", os.path.join(outdir, str(t))] + flags) job.add_arg(cmd, name=str(t).replace(".", "_")) # write and submit the DAG dagman.build(fancyname=False) print("Workflow generated for {} times".format(len(times))) if submit: dagman.submit_dag(submit_options="-force") print( "Submitted to condor, check status via:\n\n" "$ condor_q {}".format(getuser()) ) else: print( "Submit to condor via:\n\n" "$ condor_submit_dag {0.submit_file}".format(dagman), ) return dagman
"transfer_output_files = " + ", ".join(transfer_files), 'transfer_output_remaps = "' + '; '.join(file_remaps) + '"', "when_to_transfer_output = ON_EXIT" ] else: args.args[output_index] = os.path.join(output_dirname, replaced_name) transfer_lines = [ "should_transfer_files = YES", "when_to_transfer_output = ON_EXIT" ] job = Job((descriptive_name + "_" + str(i).zfill(zfill_amount) + "_" + str(args.iterations).zfill(zfill_amount)), executable=script_file, output=output, error=error, log=log, submit=submit, request_memory=args.memory, request_disk=args.disk, extra_lines=transfer_lines, verbose=2 if args.verbose else 0) job.add_arg(" ".join([args.script] + args.args)) dag.add_job(job) # Write all necessary submit files and submit dagman to Condor if args.maxjobs > 0: dag.build_submit(submit_options="-maxjobs " + str(args.maxjobs)) else: dag.build_submit()
def main(): parser = argparse.ArgumentParser( description="Compute pair-wise overlap of a batch of skymaps") parser.add_argument( "--skymap", metavar="PATH", action="append", help="A list of paths pointing to the probability skymaps") parser.add_argument("--accounting-tag", type=str, default="ligo.dev.o3.cbc.lensing.multi", help="Accounting tag") parser.add_argument("--slurm", action="store_true", help="Run on a condor+slurm cluster") parser.add_argument("--plot", action="store_true", help="Visualize the skymaps") parser.add_argument("--verbose", action="store_true", help="Be very verbose") args = parser.parse_args() compute_overlap_job_name = "compute_overlap" pairwise_overlap_out_str = "{prefix_1}_{prefix_2}_overlap.dat" # Directories for HTCondor try: os.makedirs(compute_overlap_job_name) except: pass error = os.path.abspath("logs") output = os.path.abspath("logs") log = os.path.abspath("logs") submit = os.path.abspath("") # Create a DAG (but actually each node is independent of each other) dag = Dagman( name="dag_compute_overlap_from_skymaps", submit=submit, ) universe = "vanilla" extra_lines = ["accounting_group = {}".format(args.accounting_tag)] if args.slurm: universe = "grid" extra_lines.append("grid_resource = batch slurm") # Compute overlap if len(args.skymap) >= 2: # At least two skymaps, now we can compute the pairwise overlap compute_overlap_job = Job( name="job_" + compute_overlap_job_name, executable=shutil.which("compute_overlap"), universe=universe, error=error, output=output, log=log, dag=dag, extra_lines=extra_lines, ) for skymap_1, skymap_2 in list(itertools.combinations(args.skymap, 2)): prefix_1 = get_filename_prefix(skymap_1) prefix_2 = get_filename_prefix(skymap_2) argument_str = "" if args.verbose: argument_str += " --verbose" if args.plot: argument_str += " --plot" argument_str += " --skymap " + os.path.abspath(skymap_1) + " --skymap " + os.path.abspath(skymap_2) + \ " --output " + os.path.abspath(os.path.join(compute_overlap_job_name, pairwise_overlap_out_str.format(prefix_1=prefix_1, prefix_2=prefix_2))) compute_overlap_job.add_arg(argument_str, retry=3) dag.build(fancyname=False)
file_bases = [] for filename in sorted(os.listdir(dirname)): filename = filename[:filename.rindex("_")] if filename.startswith(basename) and not (filename in file_bases): file_bases.append(filename) for base in file_bases: arguments = os.path.join(os.path.dirname(args.noise_file_basename), base) arguments += " " + str(args.outfile) arguments += " --range " + str(args.range[0]) + " " + str(args.range[1]) arguments += " --stations " + str(args.stations) arguments += " --geometry " + str(args.geometry) arguments += " --threshold " + str(args.threshold) arguments += " --tot " + str(args.tot) arguments += " --antennas_hit " + str(args.antennas_hit) calculator_job.add_arg(arguments) culminator_job.add_arg(args.outfile) # Create job dependencies # culminator_job doesn't start until calculator_job has finished calculator_job.add_child(culminator_job) # Set up a dagman dagman = Dagman("full_calculation_" + basename, submit=submit, verbose=2) # Add jobs to dagman dagman.add_job(calculator_job) dagman.add_job(culminator_job) # Write all necessary submit files and submit job to Condor dagman.build_submit()
verbose=2) output_suffixes = [ str(i).zfill(file_zero_padding) for i in range(max_file_index + 1) ] # Add arguments to jobs for i in range(args.jobs): filename = args.output + "_" + str(i).zfill(job_zero_padding) arguments = filename arguments += " --number " + str(args.number) arguments += " --size " + str(args.size) arguments += " --time " + str(args.time) arguments += " --dt " + str(args.dt) arguments += " --rms " + str(args.rms) generator_job.add_arg(arguments) if add_file_indices: files = [ filename + "_" + suffix + ".npz" for suffix in output_suffixes ] else: files = [filename + ".npz"] arguments = " ".join(files) arguments += " --output " + str(args.envelope) arguments += " --amplification " + str(args.amplification) processor_job.add_arg(arguments) # Create job dependencies # processor_job doesn't start until generator_job has finished generator_job.add_child(processor_job)
job_name = descriptive_name + "_" + str(r).zfill(zfill_amount) if args.split > 1: job_name += "_" + str(sub).zfill(len(str(args.split - 1))) job = Job(job_name, executable=script_file, output=output, error=error, log=log, submit=submit, request_memory=args.memory, request_disk=args.disk, extra_lines=transfer_lines, verbose=2 if args.verbose else 0) job_args = [tof_script] + args.args + [ "--shells 1", "--rmin", str(r), "--rmax", str(r) ] if args.split > 1: job_args += ["--subset", str(sub)] job.add_arg(" ".join(job_args)) dag.add_job(job) # Write all necessary submit files and submit dagman to Condor if args.maxjobs > 0: dag.build_submit(submit_options="-maxjobs " + str(args.maxjobs)) else: dag.build_submit()