def test_add_job_int_fail(): with pytest.raises(TypeError) as excinfo: dag = Dagman('dagname') dag.add_job(50) error = 'Expecting a Job or Dagman. ' + \ 'Got an object of type {}'.format(type(50)) assert error == str(excinfo.value)
def test_dagman_dag_parameter(tmpdir): # Test that a Dagman is added to a Dagman (as a subdag) when dag is given submit_dir = str(tmpdir.join('submit')) dag = Dagman('dagman', submit=submit_dir) subdag = Dagman('subdag', submit=submit_dir, dag=dag) assert subdag in dag
def test_add_job_dag_parameter_equality(tmpdir): submit_dir = str(tmpdir.join('submit')) dag = Dagman('dagman', submit=submit_dir) job_1 = Job('job_1', example_script, dag=dag) job_2 = Job('job_2', example_script) dag.add_job(job_2) assert dag.nodes == [job_1, job_2]
def test_repr(): default_dagman = Dagman('dagname') dag_repr = repr(default_dagman) expected_repr = ('Dagman(name=dagname, n_nodes=0, ' 'submit={})'.format(os.getcwd())) assert dag_repr == expected_repr dag_non_default = Dagman('dagname', submit='/submit_dir') dag_non_default.add_subdag(default_dagman) dag_repr = repr(dag_non_default) expected_repr = 'Dagman(name=dagname, n_nodes=1, submit=/submit_dir)' assert dag_repr == expected_repr
def test_get_job_arg_lines_not_built_raises(): job = Job('testjob', example_script) with pytest.raises(ValueError) as excinfo: Dagman('dag_name')._get_job_arg_lines(job, fancyname=True) error = ('Job {} must be built before adding it to a ' 'Dagman'.format(job.name)) assert error == str(excinfo.value)
def test_job_dag_parameter(tmpdir): # Test that a Job is added to a Dagman when dag parameter given submit_dir = str(tmpdir.join('submit')) dag = Dagman('dagman', submit=submit_dir) job = Job('job', example_script, dag=dag) assert job in dag
def test_job_arg_name_files(tmpdir, fancyname): # Test to check that when a named argument is added to a Job, and the Job # is built with fancyname=True, the Job submit file and the # error/log/output files for the argument start with the same index. # E.g. job_(date)_01.submit, job_(date)_01.error, etc. # Regression test for issue #47 submit_dir = str(tmpdir.mkdir('submit')) job = Job('testjob', example_script, submit=submit_dir) job.add_arg('arg', name='argname') dagman = Dagman('exampledagman', submit=submit_dir) dagman.add_job(job) dagman.build(fancyname=fancyname) with open(dagman.submit_file, 'r') as dagman_submit_file: dagman_submit_lines = dagman_submit_file.readlines() # Get root of the dagman submit file (submit file basename w/o .submit) submit_file_line = dagman_submit_lines[0] submit_file_basename = submit_file_line.split(os.sep)[-1].rstrip() submit_file_root = os.path.splitext(submit_file_basename)[0] # Get job_name variable (used to built error/log/output file basenames) jobname_line = dagman_submit_lines[2] jobname = jobname_line.split('"')[-2] other_file_root = '_'.join(jobname.split('_')[:-1]) assert submit_file_root == other_file_root
def test_add_subdag_dag_parameter_equality(tmpdir): submit_dir = str(tmpdir.join('submit')) dag = Dagman('dagman', submit=submit_dir) subdag_1 = Dagman('subdag_1', submit=submit_dir, dag=dag) subdag_2 = Dagman('subdag_2', submit=submit_dir) dag.add_subdag(subdag_2) assert dag.nodes == [subdag_1, subdag_2]
def test_dagman_subdag_build(tmpdir): submit_dir = str(tmpdir.join('submit')) extra_lines = ['first extra line', 'second extra line'] dagman = Dagman('dagman', submit=submit_dir, extra_lines=extra_lines) subdag = Dagman('subdag_1', submit=submit_dir, extra_lines=extra_lines) dagman.add_subdag(subdag) dagman.build() with open(dagman.submit_file, 'r') as f: assert set(extra_lines) <= set(line.rstrip('\n') for line in f) with open(dagman.submit_file, 'r') as f: assert set(extra_lines) <= set(line.rstrip('\n') for line in f)
def test_dagman_env_variable_dir(tmpdir, monkeypatch): # Set pycondor environment variable submit_dir = str(tmpdir.mkdir('submit')) monkeypatch.setenv('PYCONDOR_SUBMIT_DIR', submit_dir) dagman = Dagman('testdagman') job = Job('jobname', example_script) dagman.add_job(job) dagman.build() submit_path = os.path.dirname(dagman.submit_file) assert submit_dir == submit_path
def test_dagman_has_bad_node_names(tmpdir): submit_dir = str(tmpdir.mkdir('submit')) # Test all combinations jobs_names = ['testjob', 'testjob.', 'testjob', 'testjob+'] arg_names = ['argname', 'argname', 'argname+', 'argname.'] has_bad_node_names = [False, True, True, True] for job_name, arg_name, bad_node_names in zip(jobs_names, arg_names, has_bad_node_names): job = Job(job_name, example_script, submit=submit_dir) job.add_arg('arg', name=arg_name) dagman = Dagman('testdagman', submit=submit_dir) dagman.add_job(job) dagman.build() assert dagman._has_bad_node_names == bad_node_names
output_dirname = os.path.dirname(args.args[output_index]) logfile_index = -1 if "-l" in args.args: logfile_index = args.args.index("-l") + 1 logfile_name = os.path.basename(args.args[logfile_index]) logfile_dirname = os.path.dirname(args.args[logfile_index]) # Declare the error, output, log, and submit directories for Condor job error = '/scratch/fasig/pycondor' output = '/scratch/fasig/pycondor' log = '/scratch/fasig/pycondor' submit = '/scratch/fasig/pycondor' # Setting up the PyCondor Dagman dag = Dagman(descriptive_name, submit=submit, verbose=2 if args.verbose else 0) # Adding arguments to jobs for energy in args.energies: for i in range(args.iterations): transfer_files = [] file_remaps = [] if output_index != -1: replaced_name = output_name.replace("ENERGY", energy) replaced_name = replaced_name.replace("ITERATION", str(i).zfill(zfill_amount)) args.args[output_index] = replaced_name transfer_files.append(replaced_name) file_remaps.append(replaced_name + '=' + os.path.join(output_dirname, replaced_name)) if logfile_index != -1:
def test_dagman_job_order(tmpdir): # Test to check that the order in which Jobs are added to a Dagman doesn't # change the Dagman submit file that is built. See issue #57. submit_dir = str(tmpdir.mkdir('submit')) dag_submit_lines = [] for order_idx in range(2): dagman = Dagman('testdagman', submit=submit_dir) job_child = Job('childjob', example_script, submit=submit_dir) job_child.add_arg('--length 200', name='200jobname') job_child.add_arg('--length 400', retry=3) job_parent = Job('parentjob', example_script, submit=submit_dir) job_parent.add_arg('--length 100') job_parent.add_child(job_child) if order_idx == 0: # Add job_parent to dagman first dagman.add_job(job_parent) dagman.add_job(job_child) else: # Add job_child to dagman first dagman.add_job(job_child) dagman.add_job(job_parent) dagman.build(fancyname=False) # Append submit file lines to dag_submit_lines with open(dagman.submit_file, 'r') as dag_submit_file: dag_submit_lines.append(dag_submit_file.readlines()) # Test that the same lines occur in the Dagman submit file for # adding the parent/child jobs in either order assert Counter(dag_submit_lines[0]) == Counter(dag_submit_lines[1])
def dagman(tmpdir_factory): submit_dir = str(tmpdir_factory.mktemp('submit')) dagman = Dagman('exampledagman', submit=submit_dir) return dagman
def generate_dag(times, flags=[], tag='gwdetchar-omega-batch', submit=False, outdir=os.getcwd(), universe='vanilla', condor_commands=get_condor_arguments()): """Construct a Directed Acyclic Graph (DAG) for a batch of omega scans Parameters ---------- times : `list` of `float` list of GPS times to scan flags : `list` of `str`, optional a list of command line flags to run for each job, defaults to an empty list tag : `str`, optional a helpful string to use to name the DAG, default: `'gwdetchar-omega-batch'` submit : `bool`, optional submit the DAG to condor, default: `False` outdir : `str`, optional the output directory in which to store files, will result in sub-directories called `'condor'` and `'log'`, default: `os.getcwd` universe : `str`, optional condor universe to run in, default: `'vanilla'` condor_commands : `list` of `str`, optional list of condor settings to process with, defaults to the output of `get_condor_arguments` Returns ------- dagman : `~pycondor.Dagman` the fully built DAG object """ logdir = os.path.join(outdir, 'logs') subdir = os.path.join(outdir, 'condor') executable = find_executable('gwdetchar-omega') # create DAG and jobs dagman = Dagman(name=tag, submit=subdir) job = Job(dag=dagman, name=os.path.basename(executable), executable=executable, universe=universe, submit=subdir, error=logdir, output=logdir, getenv=True, request_memory=4096 if universe != "local" else None, extra_lines=condor_commands) # make a node in the workflow for each event time for t in times: cmd = " ".join([str(t)] + ["--output-directory", os.path.join(outdir, str(t))] + flags) job.add_arg(cmd, name=str(t).replace(".", "_")) # write and submit the DAG dagman.build(fancyname=False) print("Workflow generated for {} times".format(len(times))) if submit: dagman.submit_dag(submit_options="-force") print("Submitted to condor, check status via:\n\n" "$ condor_q {}".format(getuser())) else: print( "Submit to condor via:\n\n" "$ condor_submit_dag {0.submit_file}".format(dagman), ) return dagman
def test_get_job_arg_lines_non_job_raises(): not_job = 'not a job' with pytest.raises(TypeError) as excinfo: Dagman('dag_name')._get_job_arg_lines(not_job, fancyname=True) error = 'Expecting a Job object, got {}'.format(type(not_job)) assert error == str(excinfo.value)
for filename in sorted(os.listdir(dirname)): filename = filename[:filename.rindex("_")] if filename.startswith(basename) and not (filename in file_bases): file_bases.append(filename) for base in file_bases: arguments = os.path.join(os.path.dirname(args.noise_file_basename), base) arguments += " " + str(args.outfile) arguments += " --range " + str(args.range[0]) + " " + str(args.range[1]) arguments += " --stations " + str(args.stations) arguments += " --geometry " + str(args.geometry) arguments += " --threshold " + str(args.threshold) arguments += " --tot " + str(args.tot) arguments += " --antennas_hit " + str(args.antennas_hit) calculator_job.add_arg(arguments) culminator_job.add_arg(args.outfile) # Create job dependencies # culminator_job doesn't start until calculator_job has finished calculator_job.add_child(culminator_job) # Set up a dagman dagman = Dagman("full_calculation_" + basename, submit=submit, verbose=2) # Add jobs to dagman dagman.add_job(calculator_job) dagman.add_job(culminator_job) # Write all necessary submit files and submit job to Condor dagman.build_submit()
def generate_dag(times, flags=[], tag='gwdetchar-omega-batch', submit=False, outdir=os.getcwd(), universe='vanilla', condor_commands=get_condor_arguments()): """Construct a Directed Acyclic Graph (DAG) for a batch of omega scans Parameters ---------- times : `list` of `float` list of GPS times to scan flags : `list` of `str`, optional a list of command line flags to run for each job, defaults to an empty list tag : `str`, optional a helpful string to use to name the DAG, default: `'gwdetchar-omega-batch'` submit : `bool`, optional submit the DAG to condor, default: `False` outdir : `str`, optional the output directory in which to store files, will result in sub-directories called `'condor'` and `'log'`, default: `os.getcwd` universe : `str`, optional condor universe to run in, default: `'vanilla'` condor_commands : `list` of `str`, optional list of condor settings to process with, defaults to the output of `get_condor_arguments` Returns ------- dagman : `~pycondor.Dagman` the fully built DAG object """ logdir = os.path.join(outdir, 'logs') subdir = os.path.join(outdir, 'condor') executable = find_executable('gwdetchar-omega') # create DAG and jobs dagman = Dagman(name=tag, submit=subdir) job = Job( dag=dagman, name=os.path.basename(executable), executable=executable, universe=universe, submit=subdir, error=logdir, output=logdir, getenv=True, request_memory=4096 if universe != "local" else None, extra_lines=condor_commands ) # make a node in the workflow for each event time for t in times: cmd = " ".join([str(t)] + [ "--output-directory", os.path.join(outdir, str(t))] + flags) job.add_arg(cmd, name=str(t).replace(".", "_")) # write and submit the DAG dagman.build(fancyname=False) print("Workflow generated for {} times".format(len(times))) if submit: dagman.submit_dag(submit_options="-force") print( "Submitted to condor, check status via:\n\n" "$ condor_q {}".format(getuser()) ) else: print( "Submit to condor via:\n\n" "$ condor_submit_dag {0.submit_file}".format(dagman), ) return dagman
arguments += " --number " + str(args.number) arguments += " --size " + str(args.size) arguments += " --time " + str(args.time) arguments += " --dt " + str(args.dt) arguments += " --rms " + str(args.rms) generator_job.add_arg(arguments) if add_file_indices: files = [ filename + "_" + suffix + ".npz" for suffix in output_suffixes ] else: files = [filename + ".npz"] arguments = " ".join(files) arguments += " --output " + str(args.envelope) arguments += " --amplification " + str(args.amplification) processor_job.add_arg(arguments) # Create job dependencies # processor_job doesn't start until generator_job has finished generator_job.add_child(processor_job) # Set up a dagman dagman = Dagman("generate_" + basename, submit=submit, verbose=2) # Add jobs to dagman dagman.add_job(generator_job) dagman.add_job(processor_job) # Write all necessary submit files and submit job to Condor dagman.build_submit()
def main(): parser = argparse.ArgumentParser( description="Compute pair-wise overlap of a batch of skymaps") parser.add_argument( "--skymap", metavar="PATH", action="append", help="A list of paths pointing to the probability skymaps") parser.add_argument("--accounting-tag", type=str, default="ligo.dev.o3.cbc.lensing.multi", help="Accounting tag") parser.add_argument("--slurm", action="store_true", help="Run on a condor+slurm cluster") parser.add_argument("--plot", action="store_true", help="Visualize the skymaps") parser.add_argument("--verbose", action="store_true", help="Be very verbose") args = parser.parse_args() compute_overlap_job_name = "compute_overlap" pairwise_overlap_out_str = "{prefix_1}_{prefix_2}_overlap.dat" # Directories for HTCondor try: os.makedirs(compute_overlap_job_name) except: pass error = os.path.abspath("logs") output = os.path.abspath("logs") log = os.path.abspath("logs") submit = os.path.abspath("") # Create a DAG (but actually each node is independent of each other) dag = Dagman( name="dag_compute_overlap_from_skymaps", submit=submit, ) universe = "vanilla" extra_lines = ["accounting_group = {}".format(args.accounting_tag)] if args.slurm: universe = "grid" extra_lines.append("grid_resource = batch slurm") # Compute overlap if len(args.skymap) >= 2: # At least two skymaps, now we can compute the pairwise overlap compute_overlap_job = Job( name="job_" + compute_overlap_job_name, executable=shutil.which("compute_overlap"), universe=universe, error=error, output=output, log=log, dag=dag, extra_lines=extra_lines, ) for skymap_1, skymap_2 in list(itertools.combinations(args.skymap, 2)): prefix_1 = get_filename_prefix(skymap_1) prefix_2 = get_filename_prefix(skymap_2) argument_str = "" if args.verbose: argument_str += " --verbose" if args.plot: argument_str += " --plot" argument_str += " --skymap " + os.path.abspath(skymap_1) + " --skymap " + os.path.abspath(skymap_2) + \ " --output " + os.path.abspath(os.path.join(compute_overlap_job_name, pairwise_overlap_out_str.format(prefix_1=prefix_1, prefix_2=prefix_2))) compute_overlap_job.add_arg(argument_str, retry=3) dag.build(fancyname=False)