def test_add_priority(self): fw1 = Firework([ScriptTask(script=None)], fw_id=-1) fw2 = Firework([ScriptTask(script=None)], parents=[fw1], fw_id=-2) fw3 = Firework([ScriptTask(script=None)], parents=[fw1], fw_id=-3) wf = Workflow([fw1, fw2, fw3]) wf = add_priority(wf, 4, 8) self.assertEqual(wf.id_fw[-1].spec["_priority"], 4) self.assertEqual(wf.id_fw[-2].spec["_priority"], 8) self.assertEqual(wf.id_fw[-3].spec["_priority"], 8)
def get_test_fw(fworker, build_id=None, add_to_spec=None): spec = {"_fworker": fworker} if not build_id: build_id = "no_build" if add_to_spec: spec.update(add_to_spec) run_test = ScriptTask(script=RUN_TESTS_CMD) export_coverage = ScriptTask(script=EXPORT_COV_CMD) fw_test = Firework( [run_test, export_coverage], spec=spec, name="run tests ({})".format(build_id) ) return fw_test
def test_set_queue_adapter(self): # test fw_name_constraint fw1 = Firework([ScriptTask(script=None)], fw_id=-1, name="Firsttask") fw2 = Firework([ScriptTask(script=None)], parents=[fw1], fw_id=-2, name="Secondtask") fw3 = Firework([ScriptTask(script=None)], parents=[fw1], fw_id=-3, name="Thirdtask") wf = Workflow([fw1, fw2, fw3]) wf = set_queue_adapter(wf, {"test": { "test": 1 }}, fw_name_constraint="Secondtask") self.assertDictEqual(wf.id_fw[-1].spec, {}) self.assertDictEqual(wf.id_fw[-2].spec, {"_queueadapter": { "test": { "test": 1 } }}) self.assertDictEqual(wf.id_fw[-3].spec, {}) # test task_name_constraint fw1 = Firework([ScriptTask(script=None)], fw_id=-1, name="Firsttask") fw2 = Firework( [ScriptTask(script=None), ModifiedScriptTask(script=None)], parents=[fw1], fw_id=-2, name="Secondtask", ) fw3 = Firework([ScriptTask(script=None)], parents=[fw1], fw_id=-3, name="Thirdtask") wf = Workflow([fw1, fw2, fw3]) wf = set_queue_adapter(wf, {"test": { "test": 1 }}, task_name_constraint="ModifiedScriptTask") self.assertDictEqual(wf.id_fw[-1].spec, {}) self.assertDictEqual(wf.id_fw[-2].spec, {"_queueadapter": { "test": { "test": 1 } }}) self.assertDictEqual(wf.id_fw[-3].spec, {})
def wf_evaluate_build(fworker, build_name, dataset_set, pipe_config, include_tests=False, cache=True, kfold_config=KFOLD_DEFAULT, tags=None): """ Current fworkers: - "local": Alex's local computer - "cori": Cori - "lrc": Lawrencium """ check_pipe_config(pipe_config) if fworker not in valid_fworkers: raise ValueError("fworker must be in {}".format(valid_fworkers)) # Get a fun unique id for this build word_file = "/usr/share/dict/words" words = open(word_file).read().splitlines() words_short = [w for w in words if 4 <= len(w) <= 6] build_id = None while LP.db.automatminer_builds.find({ "build_id": build_id }).count() != 0 or not build_id: build_id = " ".join([w.lower() for w in random.sample(words_short, 2)]) print("build id: {}".format(build_id)) all_links = {} fws_fold0 = [] fws_consolidate = [] benchmark_hashes = [] for benchmark in dataset_set: links, fw_fold0, fw_consolidate = wf_benchmark( fworker, pipe_config, **benchmark, tags=tags, kfold_config=kfold_config, cache=cache, return_fireworks=True, build_id=build_id, add_dataset_to_names=True) all_links.update(links) fws_fold0.extend(fw_fold0) fws_consolidate.append(fw_consolidate) # benchmark has is the same between all fws in one benchmark benchmark_hashes.append( fw_fold0[0].to_dict()["spec"]["benchmark_hash"]) fw_build_merge = Firework(ConsolidateBenchmarksToBuild(), spec={ "benchmark_hashes": benchmark_hashes, "build_id": build_id, "pipe_config": pipe_config, "build_name": build_name, "commit": get_last_commit(), "_fworker": fworker, "tags": tags }, name="build merge ({})".format(build_id)) for fw in fws_consolidate: all_links[fw] = [fw_build_merge] if include_tests: fw_test = Firework(ScriptTask(script=RUN_TESTS_CMD), name="run tests ({})".format(build_id)) all_links[fw_test] = fws_fold0 all_links[fw_build_merge] = [] wf_name = "build: {} ({}) [{}]".format(build_id, build_name, fworker) wf = Workflow(list(all_links.keys()), all_links, name=wf_name, metadata={ "build_id": build_id, "tags": tags, "benchmark_hashes": benchmark_hashes }) return wf
) wf_fws.append(fw_fit_level_1) wf_links[fw_init_raw_data].append(fw_fit_level_1) # Unfit KB compression fw_raw_data_compression = None if COMPRESS_OUTPUT: fw_name = "ScriptTask_compression_raw_data" if VERBOSE_QUEUE: print "Queueing {}".format(fw_name) fw_raw_data_compression = Firework( ScriptTask( script = "bzip2 -v " + os.path.join(KB_DIRECTORY, filename_raw_data) ), name = fw_name, spec = {"_queueadapter": {"job_name": fw_name}, "_priority":0} ) wf_fws.append(fw_raw_data_compression) wf_links[fw_fit_level_1].append(fw_raw_data_compression) # Fit Level 1 KB compression fw_sim_data_1_compression = None if COMPRESS_OUTPUT: fw_name = "ScriptTask_compression_sim_data_1" if VERBOSE_QUEUE:
def main(sequencing_directory, library_prefix, num_libraries, raw_data_dir): lpad = LaunchPad(**yaml.load(open("my_launchpad.yaml"))) workflow_fireworks = [] workflow_dependencies = collections.defaultdict(list) library_dirs = [ os.path.join(sequencing_directory, library_prefix + str(i + 1)) for i in xrange(num_libraries) ] subdirs = ['unzipped', 'trimmed', 'aligned', 'pythonized', 'sorted'] for library_dir in library_dirs: seq_functions.make_directories(library_dir, subdirs) name = "Gunzip_%s" % os.path.basename(library_dir) fw_gunzip = Firework( [ ScriptTask(script="find " + os.path.join(library_dir, raw_data_dir) + " -name '*.gz' -print0 | xargs -0 gunzip"), ScriptTask( script="mv " + os.path.join(library_dir, raw_data_dir) + "/*.fastq " + os.path.join(library_dir, "unzipped")), ], name=name, spec={"_queueadapter": { "job_name": name }}, ) workflow_fireworks.append(fw_gunzip) name = "Trim_%s" % os.path.basename(library_dir) fw_trim = Firework( [ TrimTask(library_path=library_dir, unzipped_name="unzipped", trimmed_name="trimmed") ], name=name, spec={"_queueadapter": { "job_name": name }}, ) workflow_fireworks.append(fw_trim) workflow_dependencies[fw_gunzip].append(fw_trim) name = "Align_%s" % os.path.basename(library_dir) fw_align = Firework( [ AlignTask(library_path=library_dir, trimmed_name="trimmed", aligned_name="aligned") ], name=name, spec={"_queueadapter": { "job_name": name }}, ) workflow_fireworks.append(fw_align) workflow_dependencies[fw_trim].append(fw_align) name = "Sort_%s" % os.path.basename(library_dir) fw_sort = Firework( [ SortTask(library_path=library_dir, aligned_name="aligned", bammed_name="bammed", sorted_name="sorted") ], name=name, spec={"_queueadapter": { "job_name": name }}, ) workflow_fireworks.append(fw_sort) workflow_dependencies[fw_align].append(fw_sort) name = "Count_%s" % os.path.basename(library_dir) fw_count = Firework( [ CountTask(library_path=library_dir, aligned_name="aligned", bammed_name="bammed", counted_name="counted") ], name=name, spec={"_queueadapter": { "job_name": name }}, ) workflow_fireworks.append(fw_count) workflow_dependencies[fw_sort].append(fw_count) lpad.add_wf(Workflow(workflow_fireworks, links_dict=workflow_dependencies))
def make_md_workflow(sim, archive, stages, md_engine='gromacs', md_category='md', local_category='local', postrun_wf=None, post_wf=None, files=None): """Construct a general, single MD simulation workflow. Assumptions ----------- Queue launcher submission script must define and export the following environment variables: 1. STAGING : absolute path on resource to staging directory 2. SCRATCH : absolute path on resource to scratch directory The staging directory must already exist on all resources specified in ``stages``. The script ``run_md.sh`` must be somewhere on your path, and must take a single argument giving the directory to execute MD out of. It should create and change the working directory to that directory before anything else. Parameters ---------- sim : str MDSynthesis Sim. archive : str Absolute path to directory to launch from, which holds all required files for running MD. stages : list, str Dicts giving for each of the following keys: - 'server': server host to transfer to - 'user': username to authenticate with - 'staging': absolute path to staging area on remote resource alternatively, a path to a yaml file giving a list of dictionaries with the same information. md_engine : {'gromacs'} MD engine name; needed to determine continuation mechanism to use. md_category : str Category to use for the MD Firework. Used to target to correct rockets. local_category : str Category to use for non-MD Fireworks, which should be run by rockets where the ``archive`` directory is accessible. postrun_wf : Workflow Workflow to perform after each copyback; performed in parallel to continuation run. post_wf : Workflow Workflow to perform after completed MD (no continuation); use for final postprocessing. files : list Names of files (not paths) needed for each leg of the simulation. Need not exist, but if they do they will get staged before each run. Returns ------- workflow MD workflow; can be submitted to LaunchPad of choice. """ sim = mds.Sim(sim) #TODO: perhaps move to its own FireTask? sim.categories['md_status'] = 'running' #TODO: the trouble with this is that if this workflow is created with the intent # of being attached to another, these files may not exist at all yet f_exist = [f for f in files if os.path.exists(os.path.join(archive, f))] if isinstance(stages, string_types): with open(stages, 'r') as f: stages = yaml.load(f) ## Stage files on all resources where MD may run; takes place locally fts_stage = list() for stage in stages: fts_stage.append( FileTransferTask(mode='rtransfer', server=stage['server'], user=stage['user'], files=[os.path.join(archive, i) for i in files], dest=os.path.join(stage['staging'], sim.uuid), max_retry=5, shell_interpret=True)) fw_stage = Firework(fts_stage, spec={ '_launch_dir': archive, '_category': local_category }, name='staging') ## MD execution; takes place in queue context of compute resource # make rundir ft_mkdir = MkRunDirTask(uuid=sim.uuid) # copy input files to scratch space ft_copy = FileTransferTask( mode='copy', files=[os.path.join('${STAGING}/', sim.uuid, i) for i in files], dest=os.path.join('${SCRATCHDIR}/', sim.uuid), ignore_missing=True, shell_interpret=True) # next, run MD ft_md = ScriptTask(script='run_md.sh {}'.format( os.path.join('${SCRATCHDIR}/', sim.uuid)), use_shell=True, fizzle_bad_rc=True) # send info on where files live to pull firework ft_info = BeaconTask(uuid=sim.uuid) fw_md = Firework([ft_mkdir, ft_copy, ft_md, ft_info], spec={'_category': md_category}, name='md') ## Pull files back to archive; takes place locally ft_copyback = FilePullTask(dest=archive) fw_copyback = Firework([ft_copyback], spec={ '_launch_dir': archive, '_category': local_category }, name='pull') ## Decide if we need to continue and submit new workflow if so; takes place ## locally if md_engine == 'gromacs': ft_continue = GromacsContinueTask(sim=sim, archive=archive, stages=stages, md_engine=md_engine, md_category=md_category, local_category=local_category, postrun_wf=postrun_wf, post_wf=post_wf, files=files) else: raise ValueError("No known md engine `{}`.".format(md_engine)) fw_continue = Firework([ft_continue], spec={ '_launch_dir': archive, '_category': local_category }, name='continue') wf = Workflow([fw_stage, fw_md, fw_copyback, fw_continue], links_dict={ fw_stage: [fw_md], fw_md: [fw_copyback], fw_copyback: [fw_continue] }, name='{} | md'.format(sim.name), metadata=dict(sim.categories)) ## Mix in postrun workflow, if given if postrun_wf: if isinstance(postrun_wf, dict): postrun_wf = Workflow.from_dict(postrun_wf) wf.append_wf(Workflow.from_wflow(postrun_wf), [fw_copyback.fw_id]) return wf