示例#1
0
def newdep(args):
    from balsam import setup
    setup()
    # from balsam.core import models
    from balsam.launcher import dag

    parent = match_uniq_job(args.parent)
    child = match_uniq_job(args.child)
    dag.add_dependency(parent, child)
    print(f"Created link {parent.cute_id} --> {child.cute_id}")
示例#2
0
def mkchild(args):
    from balsam import setup
    setup()
    from balsam.launcher import dag

    if not dag.current_job:
        raise RuntimeError(f"mkchild requires that BALSAM_JOB_ID is in the environment")
    child_job = newjob(args)
    dag.add_dependency(dag.current_job, child_job)
    print(f"Created link {dag.current_job.cute_id} --> {child_job.cute_id}")
示例#3
0
                                 description="joining final outputfiles",
                                 num_nodes=1,
                                 ranks_per_node=1,
                                 node_packing_count=node_pack_count,
                                 args=join_args,
                                 wall_time_minutes=10,
                                 application="join_art_rootfiles")

    for ievent in range(nevents):
        beamon_args = f"{_file} {ievent}"

        beamon_job = dag.add_job(name=f"beamon_{i}_{ievent}",
                                 workflow=workflow_main,
                                 description="uboone full beam on chain",
                                 num_nodes=1,
                                 ranks_per_node=1,
                                 node_packing_count=node_pack_count,
                                 args=beamon_args,
                                 wall_time_minutes=80,
                                 application="beamon_chain_run1")

        # add_dependency(parent, child)
        dag.add_dependency(timestamp_job, beamon_job)
        dag.add_dependency(beamon_job, mergeFinal_job)

print("Total number of events to be processed: ", tot_events)
print("Total number of files to be processed: ", files_processed)
print(workflow_timestamp)
print(workflow_main)
print(workflow_join)
for py_script in Path(path_to_ts_estimate).glob('**/*.py'):
    job_dir, script_name = os.path.split(str(py_script))
    job_to_add = BalsamJob(
        name=script_name,
        workflow=workflow_name,
        application='python',
        args=str(py_script),
        input_files='',
        ranks_per_node=1,
        threads_per_rank=balsam_exe_settings['threads_per_rank'],
        node_packing_count={node_packing_count},
        user_workdir=job_dir,
    )
    job_to_add.save()

    # for menten
    # ranks_per_node=1,
    # node_packing_count={node_packing_count},
    # threads_per_rank not specified

    # all job_to_add_ are childs of 01 job, as from jobs_to_be_finished
    # nested for loop becouse BalsamJob.objects.filter(name=dep_job) returns
    # django.query object for a single dep_job, e.g. (H_00_relax.py)
    # no nested loop required if workflow__contains=dependent_workflow_name
    for job in pending_simulations:
        for sub_job in job:
            add_dependency(sub_job, job_to_add)  # parent, child
    # do not run 03 until all 02 for a given reaction are done
    for job in pending_simulations_dep:
        add_dependency(job_to_add, job)  # parent, child
    app_path = os.path.join(work_dir, job)
    app_desc = 'Run ' + app_name
    run_line = sys.executable + ' ' + app_path
    add_app(app_name, run_line, app_desc)

    job_name = 'job_' + app_name
    dag.add_job(name=job_name,
                workflow="libe_workflow",
                application=app_name,
                num_nodes=num_nodes,
                ranks_per_node=ranks_per_node,
                stage_out_url="local:" + work_dir,
                stage_out_files=job_name + ".out")

    # Add dependency between jobs so run one at a time.
    if prev_job_name:
        BalsamJob = dag.BalsamJob
        parent = BalsamJob.objects.get(name=prev_job_name)
        child = BalsamJob.objects.get(name=job_name)
        dag.add_dependency(parent, child)

    prev_job_name = job_name

# Check how to do in API - until then use CLI
run_cmd("balsam ls apps", True)
run_cmd("balsam ls jobs", True)

print("")
run_cmd("echo -e To launch jobs run: balsam launcher --consume-all")
print("")
示例#6
0
        reco2_job = dag.add_job(name=f"reco2_{i}_{ievent}",
                                workflow=workflow,
                                description="uboone testing reco2",
                                num_nodes=1,
                                ranks_per_node=1,
                                node_packing_count=node_pack_count,
                                args=reco2_args,
                                wall_time_minutes=50,
                                application="uboonecode_v08_00_00_27")

        reco2_post_job = dag.add_job(name=f"reco2_post_{i}_{ievent}",
                                     workflow=workflow,
                                     description="uboone testing reco2 post",
                                     num_nodes=1,
                                     ranks_per_node=1,
                                     node_packing_count=node_pack_count,
                                     args=reco2_post_args,
                                     wall_time_minutes=50,
                                     application="uboonecode_v08_00_00_27")

        # add_dependency(parent, child)
        dag.add_dependency(reco1_job, celltree_job)
        dag.add_dependency(celltree_job, larcv_job)
        dag.add_dependency(larcv_job, reco1a_job)
        dag.add_dependency(reco1a_job, reco2_job)
        dag.add_dependency(reco2_job, reco2_post_job)
        dag.add_dependency(reco2_post_job, mergeFinal_job)

print("Total number of events to be processed: ", tot_events)
示例#7
0
        description = "joining final outputfiles",
        num_nodes = 1,
        ranks_per_node = 1,
        node_packing_count = node_pack_count,
        args = join_args,
        wall_time_minutes = 1,
        application= "join_art_rootfiles_preproc"
    )
    
    for ievent in range(nevents):
        beamoff_args  = f"{_file} {ievent} {ts_string}"

        beamoff_job = dag.add_job(
            name = f"beamoff_{i}_{ievent}",
            workflow = workflow,
            description = "uboone full beam off chain",
            num_nodes = 1,
            ranks_per_node = 1,
            node_packing_count = node_pack_count,
            args = beamoff_args,
            wall_time_minutes = 50,
            application= "uboonecode_beamoff_chain_run1"
        )

        # add_dependency(parent, child)
        dag.add_dependency(beamoff_job,mergeFinal_job)


print("Total number of events to be processed: ", tot_events)
print(workflow)
    for ievent in range(nevents):
        v01b_args = f"{_file} {ievent}"
        v27_args = f" "

        v01b_job = dag.add_job(
            name=f"v01b_{i}_{ievent}",
            workflow=workflow,
            description="uboone testing v08_00_00_01b chain",
            num_nodes=1,
            ranks_per_node=1,
            node_packing_count=node_pack_count,
            args=v01b_args,
            wall_time_minutes=50,
            application="uboonecode_v08_00_00_01b_combined")

        v27_job = dag.add_job(name=f"v27_{i}_{ievent}",
                              workflow=workflow,
                              description="uboone testing v08_00_00_27",
                              num_nodes=1,
                              ranks_per_node=1,
                              node_packing_count=node_pack_count,
                              args=v27_args,
                              wall_time_minutes=50,
                              application="uboonecode_v08_00_00_27_combined")

        # add_dependency(parent, child)
        dag.add_dependency(v01b_job, v27_job)
        dag.add_dependency(v27_job, mergeFinal_job)

print("Total number of events to be processed: ", tot_events)
示例#9
0
def mock_addjobs():
    job1 = dag.add_job(name="added1")
    job2 = dag.add_job(name="added2")
    job3 = dag.add_job(name="added3")
    dag.add_dependency(parent=job2, child=job3)
示例#10
0
                input_files='',
                user_workdir=job_dir,
                node_packing_count={node_packing_count},
                ranks_per_node=1,
            )
            job_to_add.save()

        # for a given rxn_name, get all BalsamJob objects that it depends on
        dependancy.append(
            BalsamJob.objects.filter(name=py_script).exclude(
                state="JOB_FINISHED"))

    # add dependencies
    for job in pending_simulations_dep_1:
        for adding_job in dependancy:
            # handle double species like O2_O+O
            try:
                add_dependency(adding_job, job)  # parent, child
            except RuntimeError:
                pass
# ads_vib jobs dependancies
dependent_workflow_name_2 = facetpath + '_vib'
pending_simulations_dep_2 = BalsamJob.objects.filter(
    workflow__contains=dependent_workflow_name_2).exclude(state="JOB_FINISHED")

for pending_job in pending_simulations_dep_2:
    for submitted_job in all_submitted_jobs:
        balsam_submitted_job = BalsamJob.objects.filter(
            name=submitted_job).exclude(state="JOB_FINISHED")
        add_dependency(balsam_submitted_job, pending_job)  # parent, child
示例#11
0
r_grid = np.linspace(0.8, 1.3)

water_scan = []
for i, r in enumerate(r_grid):
    job = BalsamJob(
        name=f"task{i}",
        workflow="demo",
        description=f"r = {r:.3f}",
        application="nwchem-water",
        args="input.nw",
        num_nodes=1,
        ranks_per_node=64,
        cpu_affinity="depth",
        data={
            'r': r,
            'theta': 104.5
        },
    )
    water_scan.append(job)
    job.save()

plotjob = BalsamJob(
    name="plot",
    application="plot-pes",
    workflow="demo",
    input_files="",
)
plotjob.save()
for job in water_scan:
    add_dependency(parent=job, child=plotjob)
            description="uboone reco1 beam off chain",
            num_nodes=1,
            ranks_per_node=1,
            node_packing_count=node_pack_count,
            args=beamoff_reco1_args,
            wall_time_minutes=80,
            application="beamoff_chain_run1_reco1")

        beamoff_reco2_job = dag.add_job(
            name=f"beamoff_reco2_{i}_{ievent}",
            workflow=workflow_main_reco2,
            description="uboone reco2 beamoff on chain",
            num_nodes=1,
            ranks_per_node=1,
            node_packing_count=node_pack_count,
            args=beamoff_reco2_args,
            wall_time_minutes=80,
            application="beamoff_chain_run1_reco2")

        # add_dependency(parent, child)
        dag.add_dependency(timestamp_job, beamoff_reco1_job)
        dag.add_dependency(beamoff_reco1_job, beamoff_reco2_job)
        dag.add_dependency(beamoff_reco2_job, mergeFinal_job)

print("Total number of events to be processed: ", tot_events)
print("Total number of files to be processed: ", files_processed)
print(workflow_timestamp)
print(workflow_main_reco1)
print(workflow_main_reco2)
print(workflow_join)
示例#13
0
def postprocess_training():

    # First, get this current job:
    current_job = dag.current_job

    # Let's parse the arguments to get the logdir:
    args, unknown = generic_parser()

    print(args)
    print(unknown)

    # We should be able to see the log dir.
    print("Attempting to scrape tensorboard information from {}".format(
        args.log_directory))

    train_steps, train_loss, test_steps, test_loss = tabulate_events(
        args.log_directory)

    value = quantify_overtraining(minibatch_size=args.minibatch_size,
                                  train_loss=train_loss,
                                  test_loss=test_loss,
                                  train_steps=train_steps,
                                  test_steps=test_steps)

    if "2D" in dag.current_job.application:
        dimension = "2D"
    else:
        dimension = "3D"

    print(dimension)

    if value == 0:

        print("Should spawn another identical job")

        next_job = spawn_training_job(
            num_nodes=dag.current_job.num_nodes,
            wall_time_minutes=dag.current_job.wall_time_minutes,
            name=dag.current_job.name + "C",
            workflow=dag.current_job.workflow,
            dimension=dimension,
            args=dag.current_job.args)

        dag.add_dependency(dag.current_job, next_job)

    elif value == -1:
        print("Aborting this path")

    # elif value == 1:

    #     args.learning_rate = str(float(args.learning_rate)*0.5)

    #     new_args = " ".join(unknown)
    #     for key in args:
    #         new_args += "--{key} {value}".format(key=key, value=args[key])

    #     print("Should spawn another job, lower LR")

    #     next_job = spawn_training_job(
    #         num_nodes = dag.current_job.num_nodes,
    #         wall_time_minutes = dag.current_job.wall_time_minutes,
    #         name = dag.current_job.name + "-",
    #         workflow = dag.current_job.workflow,
    #         dimension = dimension,
    #         args=new_args
    #     )

    #     dag.add_dependency(dag.current_job, next_job)

    #     print("Decrease learning rate")
    elif value == 2:

        # Inference files:
        inference_files = [
            "/lus/theta-fs0/projects/datascience/cadams/wire_pixel_preprocessed_files_split/val_event_id_1_of_8.root",
            "/lus/theta-fs0/projects/datascience/cadams/wire_pixel_preprocessed_files_split/val_event_id_2_of_8.root",
            "/lus/theta-fs0/projects/datascience/cadams/wire_pixel_preprocessed_files_split/val_event_id_3_of_8.root",
            "/lus/theta-fs0/projects/datascience/cadams/wire_pixel_preprocessed_files_split/val_event_id_4_of_8.root",
            "/lus/theta-fs0/projects/datascience/cadams/wire_pixel_preprocessed_files_split/val_event_id_5_of_8.root",
            "/lus/theta-fs0/projects/datascience/cadams/wire_pixel_preprocessed_files_split/val_event_id_6_of_8.root",
            "/lus/theta-fs0/projects/datascience/cadams/wire_pixel_preprocessed_files_split/val_event_id_7_of_8.root",
            "/lus/theta-fs0/projects/datascience/cadams/wire_pixel_preprocessed_files_split/val_event_id_8_of_8.root",
        ]

        for i, _file in enumerate(inference_files):
            basename = os.path.basename(_file)
            basename = basename.replace('.root', '_out.root')
            out_file = args.checkpoint_directory + basename

            args.file = _file
            args.minibatch_size = 1
            args.iterations = 10

            new_args = " ".join(unknown)
            for key in vars(args):
                new_args += "--{key} {value} ".format(key=key.replace(
                    "_", "-"),
                                                      value=getattr(args, key))

            # Add the output file
            new_args += "--output-file {}".format(out_file)

            print(new_args)

            next_job = spawn_inference_job(
                num_nodes=dag.current_job.num_nodes,
                wall_time_minutes=dag.current_job.wall_time_minutes,
                name=dag.current_job.name + "I{}".format(i),
                workflow=dag.current_job.workflow,
                dimension=dimension,
                args=new_args)

            dag.add_dependency(dag.current_job, next_job)

            break

        print("Spawn inference jobs")
示例#14
0
    else: 
        print("make_sides_post recognized timeout flag")

    num_sides = int(os.environ['BALSAM_FT_NUM_SIDES'])
    num_files = len(glob.glob("side*.dat"))

    if num_files == num_sides:
        print("it's okay, the job was actually done")
        current_job.update_state("JOB_FINISHED", "handled error; it was okay")
        exit(0)
    elif num_files == 0:
        print("Creating rescue job")
        children = current_job.get_children()
        rescue = dag.spawn_child(clone=True, application_args="--sleep 0 --retcode 0")
        rescue.set_parents([])
        current_job.update_state("JOB_FINISHED", f"spawned rescue job {rescue.cute_id}")
        for child in children:
            child.set_parents([rescue])
        exit(0)

if '--dynamic-spawn' not in sys.argv:
    sys.exit(0)

reduce_job = current_job.get_child_by_name('sum_squares')

for i, sidefile in enumerate(glob.glob("side*.dat")):
    square_job = dag.spawn_child(name=f"square{i}", application="square",
                    application_args=sidefile, input_files=sidefile)
    dag.add_dependency(parent=square_job, child=reduce_job)
    print(f"spawned square{i} job")