def test_other_does_not_contain_child(): a = dags.DAG() b = dags.DAG() a_child = a.layer(name="a_child") assert a_child not in b
def test_other_does_not_contain_child_even_if_same_name(): a = dags.DAG() b = dags.DAG() a_child = a.layer(name="child") b_child = b.layer(name="child") assert a_child not in b assert b_child not in a
def test_jobstate_log_as_path(dag_dir): logfile = Path("i_am_the_jobstate.log").absolute() dag = dags.DAG(jobstate_log=logfile) dag.write(dag_dir) assert f"JOBSTATE_LOG {logfile.as_posix()}" in dagfile_lines(dag_dir)
def test_dagman_job_attributes_with_two_attrs(dag_dir): dag = dags.DAG(dagman_job_attributes={"foo": "bar", "wizard": 17}) dag.write(dag_dir) contents = dagfile_lines(dag_dir) assert all(("SET_JOB_ATTR foo = bar" in contents, "SET_JOB_ATTR wizard = 17" in contents))
def test_jobstate_log_as_str(dag_dir): logfile = "i_am_the_jobstate.log" dag = dags.DAG(jobstate_log=logfile) dag.write(dag_dir) assert f"JOBSTATE_LOG {logfile}" in dagfile_lines(dag_dir)
def test_dot_config_not_default(dag_dir): dag = dags.DAG(dot_config=dags.DotConfig( "dag.dot", update=True, overwrite=False, include_file="include-me.dot")) dag.write(dag_dir) assert "DOT dag.dot UPDATE DONT-OVERWRITE INCLUDE include-me.dot" in dagfile_lines( dag_dir)
def test_dag_contains_child(): a = dags.DAG() a_child = a.layer(name="a_child") assert a_child in a
#!/usr/bin/env python from pathlib import Path import glob import htcondor import htcondor_dags as dags analysis_dag = dags.DAG() # This is the "count words in chunk" step, which now lives in the sub-DAG. # The split will have run by the time this code executes. # Therefore, we can inspect the directory to find out how many chunks were created. # determine the number of files in this directory that match the pattern num_chunks = len(glob.glob("words_*.txt")) count_words = analysis_dag.layer( name="count_words", submit_description=htcondor.Submit({ "executable": "count_words.py", "arguments": "$(word_set)", "transfer_input_files": "words_$(word_set).txt", "output": "count_words_$(word_set).out", "error": "count_words_$(word_set).err", }), vars=[{ "word_set": str(n) } for n in range(num_chunks)], )
def test_max_jobs_per_category_with_two_categories(dag_dir): dag = dags.DAG(max_jobs_by_category={"foo": 5, "bar": 10}) dag.write(dag_dir) contents = dagfile_lines(dag_dir) assert all(("CATEGORY foo 5" in contents, "CATEGORY bar 10" in contents))
def test_max_jobs_per_category_with_one_category(dag_dir): dag = dags.DAG(max_jobs_by_category={"foo": 5}) dag.write(dag_dir) assert "CATEGORY foo 5" in dagfile_lines(dag_dir)
def test_dagman_job_attributes_with_one_attr(dag_dir): dag = dags.DAG(dagman_job_attributes={"foo": "bar"}) dag.write(dag_dir) assert "SET_JOB_ATTR foo = bar" in dagfile_lines(dag_dir)
def test_config_file_has_right_contents(dag_dir): dag = dags.DAG(dagman_config={"DAGMAN_MAX_JOBS_IDLE": 10}) dag.write(dag_dir) assert ("DAGMAN_MAX_JOBS_IDLE = 10" in (dag_dir / dags.CONFIG_FILE_NAME).read_text().splitlines())
def test_config_command_gets_written_if_config_given(dag_dir): dag = dags.DAG(dagman_config={"DAGMAN_MAX_JOBS_IDLE": 10}) dag.write(dag_dir) assert (f"\nCONFIG {dags.CONFIG_FILE_NAME}\n" in (dag_dir / dags.DEFAULT_DAG_FILE_NAME).read_text())
#!/usr/bin/env python from pathlib import Path import htcondor import htcondor_dags as dags top_layer_dag = dags.DAG() # This is the "split" step. It stays in the top-level DAG. # Note that split_words.py no longer takes arguments. It determines the number # of chunks itself. split_words = top_layer_dag.layer( name="split_words", submit_description=htcondor.Submit({ "executable": "split_words.py", "transfer_input_files": "words.txt", "output": "split_words.out", "error": "split_words.err", }), post=dags.Script(executable="make_analysis_dag.py"), ) analysis_subdag = split_words.child_subdag(name="analysis", dag_file="analysis.dag") # Write out the DAG. # Now that we're going to have two DAG input files in this directory, we need # to give them unique names. this_dir = Path(__file__).parent top_layer_dag.write(this_dir, dag_file_name="top_level.dag")
#!/usr/bin/env python from pathlib import Path import htcondor import htcondor_dags as dags # We will split words.txt into five chunks. NUM_CHUNKS = 5 # Start by creating the DAG object itself. # This object "holds" the DAG information. # Meta-information like DAGMan configuration, the location of the node status # file, etc., lives on this object. # It's methods are used to create node layers and possibly subDAGs. diamond = dags.DAG() # This is the "split" step. # It has no parent layer, so it is a root layer of the DAG. # Root layers are created from the DAG object itself. split_words = diamond.layer( name="split_words", submit_description=htcondor.Submit({ "executable": "split_words.py", "arguments": str(NUM_CHUNKS), "transfer_input_files": "words.txt", "output": "split_words.out", "error": "split_words.err", }), )
def test_empty_dag_writes_empty_dagfile(dag_dir): dag = dags.DAG() dag.write(dag_dir) # if there are any lines in the file, they must be comments assert all(line.startswith("#") for line in dagfile_lines(dag_dir))
def test_node_status_file_not_default(dag_dir): dag = dags.DAG(node_status_file=dags.NodeStatusFile( "node_status_file", update_time=60, always_update=True)) dag.write(dag_dir) assert "NODE_STATUS_FILE node_status_file 60 ALWAYS-UPDATE"
def test_node_status_file_default(dag_dir): dag = dags.DAG(node_status_file=dags.NodeStatusFile("node_status_file")) dag.write(dag_dir) assert "NODE_STATUS_FILE node_status_file"
def test_dot_config_default(dag_dir): dag = dags.DAG(dot_config=dags.DotConfig("dag.dot")) dag.write(dag_dir) assert "DOT dag.dot DONT-UPDATE OVERWRITE" in dagfile_lines(dag_dir)
def test_config_file_gets_written_if_config_given(dag_dir): dag = dags.DAG(dagman_config={"DAGMAN_MAX_JOBS_IDLE": 10}) dag.write(dag_dir) assert (dag_dir / dags.CONFIG_FILE_NAME).exists()
def dag(): return dags.DAG()