示例#1
0
def submit_outer_dag(
    working_dir: Path,
    source_dir: Path,
    dest_dir: Path,
    requirements: Optional[str] = None,
    unique_id: Optional[str] = None,
    test_mode: bool = False,
):

    # Only import htcondor.dags submit-side
    import htcondor.dags as dags

    working_dir = working_dir.resolve()
    dest_dir = dest_dir.resolve()

    working_dir.mkdir(parents=True, exist_ok=True)
    dest_dir.mkdir(parents=True, exist_ok=True)

    transfer_manifest_path = dest_dir / "transfer_manifest.txt"

    outer_dag = make_outer_dag(
        dest_dir,
        requirements,
        source_dir,
        test_mode,
        transfer_manifest_path,
        unique_id,
        working_dir,
    )

    if requirements:
        (working_dir / "requirements.txt").write_text(requirements)

    outer_dag_file = dags.write_dag(outer_dag,
                                    dag_dir=working_dir,
                                    dag_file_name="outer.dag")

    dag_args = {'force': 1}
    sub = htcondor.Submit.from_dag(str(outer_dag_file), dag_args)

    with change_dir(working_dir):
        schedd = htcondor.Schedd()
        with schedd.transaction() as txn:
            return sub.queue(txn)
示例#2
0
def submit_outer_dag(
    direction: TransferDirection,
    working_dir: Path,
    local_dir: Path,
    remote_dir: Path,
    requirements: Optional[str] = None,
    unique_id: Optional[str] = None,
    test_mode: bool = False,
) -> int:
    # Only import htcondor.dags submit-side
    import htcondor.dags as dags

    working_dir = working_dir.resolve()
    local_dir = local_dir.resolve()

    working_dir.mkdir(parents=True, exist_ok=True)
    local_dir.mkdir(parents=True, exist_ok=True)

    outer_dag = make_outer_dag(
        direction=direction,
        local_dir=local_dir,
        remote_dir=remote_dir,
        working_dir=working_dir,
        requirements=requirements,
        unique_id=unique_id,
        test_mode=test_mode,
    )

    outer_dag_file = dags.write_dag(outer_dag,
                                    dag_dir=working_dir,
                                    dag_file_name=OUTER_DAG_NAME)

    sub = htcondor.Submit.from_dag(str(outer_dag_file), DAG_ARGS)

    with change_dir(working_dir):
        schedd = htcondor.Schedd()
        with schedd.transaction() as txn:
            return sub.queue(txn)
示例#3
0
def create_dag_file(dag_graph, dag_dir_name, information_dict):
    """

    :param dag_graph: a DAG networkx graph representing the dependencies between the different jobs,
                        where a job is specified by 'job_name'
    :param dag_dir_name: Directory for the dag. Will be overwritten.
    :param information_dict: a dictionary of dictionaries: has a key for each 'job_name'.
            in information_dict['job_name'] there are keys for
                the python script path (py_script_path)
                the batch parameters (batch_parameters). Will be set to [] by default
                'kargs_dict' is a dictionary holding all parameters for running a job as specified in send_job scripts.

    :return:
    """
    nodes = list(nx.topological_sort(dag_graph))
    # layers = []
    dag = dags.DAG()
    for job_name in nodes:
        job_submit = create_job_submit_format_from_python_script(
            information_dict[job_name]['py_script_path'], job_name,
            **information_dict[job_name]['kargs_dict'])
        if 'batch_parameters' not in information_dict[job_name].keys():
            information_dict[job_name]['batch_parameters'] = [{}]
        layer = dag.layer(name=job_name,
                          submit_description=job_submit,
                          vars=information_dict[job_name]['batch_parameters'])
        # layers.append(layer)
        parents = list(dag_graph.predecessors(job_name))
        if parents:
            for parent in parents:
                layer.add_parents(dag.glob(parent))

    print(dag.describe())
    if not os.path.exists(exec_dir + dag_dir_name):
        os.mkdir(exec_dir + dag_dir_name)
    shutil.rmtree(exec_dir + dag_dir_name, ignore_errors=True)
    dag_file = dags.write_dag(dag, exec_dir + dag_dir_name)
    return dag_file
示例#4
0
文件: testing.py 项目: cwinpy/cwinpy
    def __init__(
        self,
        prior,
        ninj=100,
        maxamp=None,
        basedir=None,
        detector="AH1",
        submit=False,
        accountuser=None,
        accountgroup=None,
        getenv=False,
        sampler="dynesty",
        sampler_kwargs=None,
        freqrange=(10.0, 750.0),
        outputsnr=True,
        numba=False,
    ):

        if isinstance(prior, dict):
            self.prior = bilby.core.prior.PriorDict(dictionary=prior)
        else:
            raise TypeError("Prior must be a dictionary-type object")

        if ninj < 1:
            raise ValueError("A positive number of injection must be given")
        self.ninj = int(ninj)

        # set maximum amplitude if given
        self.maxamp = None
        if isinstance(maxamp, float):
            if maxamp > 0.0:
                self.maxamp = maxamp
            else:
                raise ValueError("Maximum amplitude must be positive")

        if basedir is not None:
            self.basedir = basedir
            self.makedirs(basedir)
        else:
            self.basedir = os.getcwd()

        # build output directory structure
        self.detector = detector
        if isinstance(self.detector, str):
            self.detector = [self.detector]
        if not isinstance(self.detector, list):
            raise TypeError("Detector must be a string or list of strings")

        # posterior sample results directory
        self.resultsdir = os.path.join(self.basedir, "results")
        self.makedirs(self.resultsdir)

        # create pulsar parameter files
        self.create_pulsars(freqrange=freqrange)

        # create dag configuration file
        self.accountuser = accountuser
        self.accountgroup = accountgroup
        self.getenv = getenv
        self.sampler = sampler
        self.sampler_kwargs = sampler_kwargs
        self.outputsnr = outputsnr
        self.numba = numba
        self.create_config()

        # create the DAG for cwinpy_pe jobs
        self.runner = pe_pipeline(config=self.config, build=False)

        # add PP plot creation DAG
        self.ppplots()

        # build and submit the DAG
        # write out the DAG and submit files
        submitdir = os.path.join(self.basedir, "submit")
        if not os.path.exists(submitdir):
            os.makedirs(submitdir)

        dagname = "cwinpy_pe_pp_plot"
        dag_file = write_dag(self.runner.dag, submitdir, dag_file_name=f"{dagname}.dag")

        # submit the DAG if requested
        if submit:
            submit_dag(dag_file)
示例#5
0
from pathlib import Path

import htcondor
from htcondor import dags

top_layer_dag = dags.DAG()

# This is the "split" step. It stays in the top-level DAG.
# Note that split_words.py no longer takes arguments. It determines the number
# of chunks itself.
split_words = top_layer_dag.layer(
    name="split_words",
    submit_description=htcondor.Submit({
        "executable": "split_words.py",
        "transfer_input_files": "words.txt",
        "output": "split_words.out",
        "error": "split_words.err",
    }),
    post=dags.Script(executable="make_analysis_dag.py"),
)

analysis_subdag = split_words.child_subdag(name="analysis",
                                           dag_file="analysis.dag")

# Write out the DAG.
# Now that we're going to have two DAG input files in this directory, we need
# to give them unique names.
this_dir = Path(__file__).parent
dags.write_dag(top_layer_dag, this_dir, dag_file_name="top_level.dag")
print(f"Wrote DAG files to {this_dir}")
示例#6
0
def write_inner_dag(
    direction: TransferDirection,
    remote_prefix: Path,
    remote_manifest: Path,
    local_prefix: Path,
    requirements=None,
    test_mode: bool = False,
    unique_id=None,
):
    # Only import htcondor.dags submit-side
    import htcondor.dags as dags

    logging.info(
        "Generating SUBGDAG for transfer of %s->%s",
        remote_prefix,
        local_prefix,
    )

    logging.info("Parsing remote file manifest...")

    remote_files = parse_file_manifest(remote_prefix, remote_manifest)

    logging.info("Generating local file manifest...")

    local_manifest_path = Path(LOCAL_MANIFEST_FILE_NAME)
    create_file_manifest(local_prefix, local_manifest_path)
    local_files = parse_file_manifest(local_prefix, local_manifest_path)

    transfer_manifest_path = local_prefix / TRANSFER_MANIFEST_FILE_NAME
    transfer_manifest_path.parent.mkdir(parents=True, exist_ok=True)
    transfer_manifest_path.touch(exist_ok=True)

    # Never transfer the transfer manifest
    transfer_manifest_file = transfer_manifest_path.relative_to(local_prefix)

    local_files.pop(transfer_manifest_file, None)
    remote_files.pop(transfer_manifest_file, None)

    if direction is TransferDirection.PULL:
        src_files, dest_files = remote_files, local_files
    else:  # This is a PUSH
        src_files, dest_files = local_files, remote_files

    files_to_transfer = {
        fname
        for fname, size in src_files.items()
        if size != dest_files.get(fname, -1)
    }

    # TODO: rethink this logic for push vs. pull
    # Check for files that we have already verified, and do not verify them again.
    files_verified = set()
    for entry, _ in read_manifest(transfer_manifest_path):
        if not isinstance(entry, TransferComplete):
            continue

        files_verified.add(entry.name)

    files_to_verify = set()
    for fname in remote_files:
        if fname in files_to_transfer:
            continue

        if fname not in files_verified:
            files_to_verify.add(fname)

    files_to_transfer = sorted(files_to_transfer)
    files_to_verify = sorted(files_to_verify)

    if direction is TransferDirection.PULL:
        ensure_local_dirs_exist(local_prefix, files_to_transfer)

    transfer_cmd_info = make_cmd_info(direction, files_to_transfer,
                                      remote_prefix, local_prefix,
                                      transfer_manifest_path)
    verify_cmd_info = make_cmd_info(direction, files_to_verify, remote_prefix,
                                    local_prefix, transfer_manifest_path)

    write_cmd_info(transfer_cmd_info, Path(TRANSFER_COMMANDS_FILE_NAME))
    write_cmd_info(verify_cmd_info, Path(VERIFY_COMMANDS_FILE_NAME))

    dags.write_dag(
        make_inner_dag(
            direction=direction,
            requirements=requirements,
            transfer_cmd_info=transfer_cmd_info,
            verify_cmd_info=verify_cmd_info,
            unique_id=unique_id,
            test_mode=test_mode,
        ),
        dag_dir=Path.cwd(),  # this will be the working dir of the outer DAG
        dag_file_name=INNER_DAG_NAME,
    )

    bytes_to_transfer = sum(src_files[fname] for fname in files_to_transfer)
    bytes_to_verify = sum(src_files[fname] for fname in files_to_verify)

    with transfer_manifest_path.open(mode="a") as f:
        SyncRequest(
            direction=direction,
            remote_prefix=remote_prefix,
            files_at_source=len(src_files),
            files_to_transfer=len(files_to_transfer),
            bytes_to_transfer=bytes_to_transfer,
            files_to_verify=len(files_to_verify),
            bytes_to_verify=bytes_to_verify,
            timestamp=timestamp(),
        ).write_entry_to(f)

        for fname in files_to_transfer:
            TransferRequest(name=fname,
                            size=src_files[fname]).write_entry_to(f)

        for fname in files_to_verify:
            VerifyRequest(name=fname, size=src_files[fname]).write_entry_to(f)
示例#7
0
def write_inner_dag(
    source_prefix: Path,
    source_manifest: Path,
    dest_prefix: Path,
    requirements=None,
    test_mode: bool = False,
    unique_id=None,
):

    # Only import htcondor.dags submit-side
    import htcondor.dags as dags

    src_files = parse_manifest(source_prefix, source_manifest, "Source")

    generate_file_listing(dest_prefix, Path("destination_manifest.txt"))
    dest_files = parse_manifest(dest_prefix, Path("destination_manifest.txt"),
                                "Destination")

    files_to_xfer = set()
    for fname in src_files:
        if src_files[fname] != dest_files.get(fname, -1):
            files_to_xfer.add(fname)

    transfer_manifest_path = Path(
        os.path.join(dest_prefix, "transfer_manifest.txt"))
    transfer_manifest_path.parent.mkdir(parents=True, exist_ok=True)
    transfer_manifest_path.touch(exist_ok=True)

    # Check for files that we have already verified, and do not verify them again.
    files_verified = set()
    with transfer_manifest_path.open(mode="r") as f:
        for line in f:
            line = line.strip()

            if not line or line.startswith("#"):
                continue

            info = line.split()

            if info[0] != "TRANSFER_VERIFIED":
                continue

            info = json.loads(" ".join(info[1:]))
            if not valid_metadata(info):
                continue

            files_verified.add(info["name"])

    files_to_verify = set()
    for fname in src_files:
        if fname in files_to_xfer:
            continue

        if fname not in files_verified:
            files_to_verify.add(fname)

    ensure_destination_dirs_exist(dest_prefix, files_to_xfer)

    xfer_cmd_info = make_cmd_info(files_to_xfer, source_prefix, dest_prefix,
                                  transfer_manifest_path)
    verify_cmd_info = make_cmd_info(files_to_verify, source_prefix,
                                    dest_prefix, transfer_manifest_path)

    write_cmd_info(xfer_cmd_info, Path("xfer_commands.json"))
    write_cmd_info(verify_cmd_info, Path("verify_commands.json"))

    inner_dag = make_inner_dag(requirements, xfer_cmd_info, verify_cmd_info,
                               unique_id, test_mode)

    print(inner_dag.describe())

    dags.write_dag(inner_dag, dag_dir=Path.cwd(), dag_file_name="inner.dag")

    bytes_to_transfer = sum(src_files[fname] for fname in files_to_xfer)
    bytes_to_verify = sum(src_files[fname] for fname in files_to_verify)
    with transfer_manifest_path.open(mode="a") as f:
        f.write(
            "SYNC_REQUEST {} files_at_source={} files_to_transfer={} bytes_to_transfer={} files_to_verify={} bytes_to_verify={} timestamp={}\n"
            .format(source_prefix, len(src_files), len(files_to_xfer),
                    bytes_to_transfer, len(files_to_verify), bytes_to_verify,
                    time.time()))

        for fname in files_to_xfer:
            info = {"name": fname, "size": src_files[fname]}
            f.write("TRANSFER_REQUEST {}\n".format(json.dumps(info)))
        for fname in files_to_verify:
            info = {"name": fname, "size": src_files[fname]}
            f.write("VERIFY_REQUEST {}\n".format(json.dumps(info)))
示例#8
0
def test_config_file_has_right_contents(dag_dir):
    dag = dags.DAG(dagman_config={"DAGMAN_MAX_JOBS_IDLE": 10})
    dags.write_dag(dag, dag_dir)

    assert ("DAGMAN_MAX_JOBS_IDLE = 10"
            in (dag_dir / dags.CONFIG_FILE_NAME).read_text().splitlines())
示例#9
0
def test_config_file_gets_written_if_config_given(dag_dir):
    dag = dags.DAG(dagman_config={"DAGMAN_MAX_JOBS_IDLE": 10})
    dags.write_dag(dag, dag_dir)

    assert (dag_dir / dags.CONFIG_FILE_NAME).exists()
示例#10
0
)

# This is the "combine the counts from each chunk" step.
# Since it can't run until all the chunks are done, we create it as a child
# of the previous step.
# It's input files are all of the output files from the previous step, which
# is easy in this case because we know the naming scheme.
combine_counts = count_words.child_layer(
    name="combine_counts",
    submit_description=htcondor.Submit({
        "executable":
        "combine_counts.py",
        "transfer_input_files":
        ", ".join(f"counts_{n}.txt" for n in range(NUM_CHUNKS)),
        "output":
        "combine_counts.out",
        "error":
        "combine_counts.err",
    }),
)

# We're done setting up the DAG, so we can write it out.
# The DAG input file itself as well as all of the submit descriptions will
# be written out to the specified directory.
# Here, we just write it out to the same directory that this file is in.
# If you write it out to a different directory, you may need to be careful
# about filepaths in your submit descriptions!
this_dir = Path(__file__).parent
dags.write_dag(diamond, this_dir)
print(f"Wrote DAG files to {this_dir}")
示例#11
0
    }),
    vars=[{
        "word_set": str(n)
    } for n in range(num_chunks)],
)

# This is the "combine the counts from each chunk" step.
combine_counts = count_words.child_layer(
    name="combine_counts",
    submit_description=htcondor.Submit({
        "executable":
        "combine_counts.py",
        "transfer_input_files":
        ", ".join(f"counts_{n}.txt" for n in range(num_chunks)),
        "output":
        "combine_counts.out",
        "error":
        "combine_counts.err",
    }),
)

# We're done setting up the DAG, so we can write it out.
# The DAG input file itself as well as all of the submit descriptions will
# be written out to the specified directory.
# Here, we just write it out to the same directory that this file is in.
# If you write it out to a different directory, you may need to be careful
# about filepaths in your submit descriptions!
this_dir = Path(__file__).parent
dags.write_dag(analysis_dag, this_dir, dag_file_name="analysis.dag")
print(f"Wrote DAG files to {this_dir}")