示例#1
0
    def test_add_priority(self):
        fw1 = Firework([ScriptTask(script=None)], fw_id=-1)
        fw2 = Firework([ScriptTask(script=None)], parents=[fw1], fw_id=-2)
        fw3 = Firework([ScriptTask(script=None)], parents=[fw1], fw_id=-3)

        wf = Workflow([fw1, fw2, fw3])

        wf = add_priority(wf, 4, 8)
        self.assertEqual(wf.id_fw[-1].spec["_priority"], 4)
        self.assertEqual(wf.id_fw[-2].spec["_priority"], 8)
        self.assertEqual(wf.id_fw[-3].spec["_priority"], 8)
示例#2
0
def get_test_fw(fworker, build_id=None, add_to_spec=None):
    spec = {"_fworker": fworker}

    if not build_id:
        build_id = "no_build"

    if add_to_spec:
        spec.update(add_to_spec)

    run_test = ScriptTask(script=RUN_TESTS_CMD)
    export_coverage = ScriptTask(script=EXPORT_COV_CMD)
    fw_test = Firework(
        [run_test, export_coverage],
        spec=spec,
        name="run tests ({})".format(build_id)
    )
    return fw_test
示例#3
0
    def test_set_queue_adapter(self):
        # test fw_name_constraint
        fw1 = Firework([ScriptTask(script=None)], fw_id=-1, name="Firsttask")
        fw2 = Firework([ScriptTask(script=None)],
                       parents=[fw1],
                       fw_id=-2,
                       name="Secondtask")
        fw3 = Firework([ScriptTask(script=None)],
                       parents=[fw1],
                       fw_id=-3,
                       name="Thirdtask")

        wf = Workflow([fw1, fw2, fw3])
        wf = set_queue_adapter(wf, {"test": {
            "test": 1
        }},
                               fw_name_constraint="Secondtask")
        self.assertDictEqual(wf.id_fw[-1].spec, {})
        self.assertDictEqual(wf.id_fw[-2].spec,
                             {"_queueadapter": {
                                 "test": {
                                     "test": 1
                                 }
                             }})
        self.assertDictEqual(wf.id_fw[-3].spec, {})

        # test task_name_constraint
        fw1 = Firework([ScriptTask(script=None)], fw_id=-1, name="Firsttask")
        fw2 = Firework(
            [ScriptTask(script=None),
             ModifiedScriptTask(script=None)],
            parents=[fw1],
            fw_id=-2,
            name="Secondtask",
        )
        fw3 = Firework([ScriptTask(script=None)],
                       parents=[fw1],
                       fw_id=-3,
                       name="Thirdtask")

        wf = Workflow([fw1, fw2, fw3])
        wf = set_queue_adapter(wf, {"test": {
            "test": 1
        }},
                               task_name_constraint="ModifiedScriptTask")
        self.assertDictEqual(wf.id_fw[-1].spec, {})
        self.assertDictEqual(wf.id_fw[-2].spec,
                             {"_queueadapter": {
                                 "test": {
                                     "test": 1
                                 }
                             }})
        self.assertDictEqual(wf.id_fw[-3].spec, {})
示例#4
0
def wf_evaluate_build(fworker,
                      build_name,
                      dataset_set,
                      pipe_config,
                      include_tests=False,
                      cache=True,
                      kfold_config=KFOLD_DEFAULT,
                      tags=None):
    """
    Current fworkers:
    - "local": Alex's local computer
    - "cori": Cori
    - "lrc": Lawrencium
    """
    check_pipe_config(pipe_config)
    if fworker not in valid_fworkers:
        raise ValueError("fworker must be in {}".format(valid_fworkers))

    # Get a fun unique id for this build
    word_file = "/usr/share/dict/words"
    words = open(word_file).read().splitlines()
    words_short = [w for w in words if 4 <= len(w) <= 6]

    build_id = None
    while LP.db.automatminer_builds.find({
            "build_id": build_id
    }).count() != 0 or not build_id:
        build_id = " ".join([w.lower() for w in random.sample(words_short, 2)])
    print("build id: {}".format(build_id))

    all_links = {}
    fws_fold0 = []
    fws_consolidate = []
    benchmark_hashes = []
    for benchmark in dataset_set:
        links, fw_fold0, fw_consolidate = wf_benchmark(
            fworker,
            pipe_config,
            **benchmark,
            tags=tags,
            kfold_config=kfold_config,
            cache=cache,
            return_fireworks=True,
            build_id=build_id,
            add_dataset_to_names=True)
        all_links.update(links)
        fws_fold0.extend(fw_fold0)
        fws_consolidate.append(fw_consolidate)
        # benchmark has is the same between all fws in one benchmark
        benchmark_hashes.append(
            fw_fold0[0].to_dict()["spec"]["benchmark_hash"])

    fw_build_merge = Firework(ConsolidateBenchmarksToBuild(),
                              spec={
                                  "benchmark_hashes": benchmark_hashes,
                                  "build_id": build_id,
                                  "pipe_config": pipe_config,
                                  "build_name": build_name,
                                  "commit": get_last_commit(),
                                  "_fworker": fworker,
                                  "tags": tags
                              },
                              name="build merge ({})".format(build_id))

    for fw in fws_consolidate:
        all_links[fw] = [fw_build_merge]

    if include_tests:
        fw_test = Firework(ScriptTask(script=RUN_TESTS_CMD),
                           name="run tests ({})".format(build_id))
        all_links[fw_test] = fws_fold0
    all_links[fw_build_merge] = []

    wf_name = "build: {} ({}) [{}]".format(build_id, build_name, fworker)
    wf = Workflow(list(all_links.keys()),
                  all_links,
                  name=wf_name,
                  metadata={
                      "build_id": build_id,
                      "tags": tags,
                      "benchmark_hashes": benchmark_hashes
                  })
    return wf
示例#5
0
	)

wf_fws.append(fw_fit_level_1)
wf_links[fw_init_raw_data].append(fw_fit_level_1)

# Unfit KB compression
fw_raw_data_compression = None
if COMPRESS_OUTPUT:
	fw_name = "ScriptTask_compression_raw_data"

	if VERBOSE_QUEUE:
		print "Queueing {}".format(fw_name)

	fw_raw_data_compression = Firework(
		ScriptTask(
			script = "bzip2 -v " + os.path.join(KB_DIRECTORY, filename_raw_data)
			),
		name = fw_name,
		spec = {"_queueadapter": {"job_name": fw_name}, "_priority":0}
		)

	wf_fws.append(fw_raw_data_compression)
	wf_links[fw_fit_level_1].append(fw_raw_data_compression)

# Fit Level 1 KB compression

fw_sim_data_1_compression = None
if COMPRESS_OUTPUT:
	fw_name = "ScriptTask_compression_sim_data_1"

	if VERBOSE_QUEUE:
示例#6
0
def main(sequencing_directory, library_prefix, num_libraries, raw_data_dir):
    lpad = LaunchPad(**yaml.load(open("my_launchpad.yaml")))
    workflow_fireworks = []
    workflow_dependencies = collections.defaultdict(list)

    library_dirs = [
        os.path.join(sequencing_directory, library_prefix + str(i + 1))
        for i in xrange(num_libraries)
    ]
    subdirs = ['unzipped', 'trimmed', 'aligned', 'pythonized', 'sorted']

    for library_dir in library_dirs:
        seq_functions.make_directories(library_dir, subdirs)

        name = "Gunzip_%s" % os.path.basename(library_dir)
        fw_gunzip = Firework(
            [
                ScriptTask(script="find " +
                           os.path.join(library_dir, raw_data_dir) +
                           " -name '*.gz' -print0 | xargs -0 gunzip"),
                ScriptTask(
                    script="mv " + os.path.join(library_dir, raw_data_dir) +
                    "/*.fastq " + os.path.join(library_dir, "unzipped")),
            ],
            name=name,
            spec={"_queueadapter": {
                "job_name": name
            }},
        )
        workflow_fireworks.append(fw_gunzip)

        name = "Trim_%s" % os.path.basename(library_dir)
        fw_trim = Firework(
            [
                TrimTask(library_path=library_dir,
                         unzipped_name="unzipped",
                         trimmed_name="trimmed")
            ],
            name=name,
            spec={"_queueadapter": {
                "job_name": name
            }},
        )
        workflow_fireworks.append(fw_trim)
        workflow_dependencies[fw_gunzip].append(fw_trim)

        name = "Align_%s" % os.path.basename(library_dir)
        fw_align = Firework(
            [
                AlignTask(library_path=library_dir,
                          trimmed_name="trimmed",
                          aligned_name="aligned")
            ],
            name=name,
            spec={"_queueadapter": {
                "job_name": name
            }},
        )
        workflow_fireworks.append(fw_align)
        workflow_dependencies[fw_trim].append(fw_align)

        name = "Sort_%s" % os.path.basename(library_dir)
        fw_sort = Firework(
            [
                SortTask(library_path=library_dir,
                         aligned_name="aligned",
                         bammed_name="bammed",
                         sorted_name="sorted")
            ],
            name=name,
            spec={"_queueadapter": {
                "job_name": name
            }},
        )
        workflow_fireworks.append(fw_sort)
        workflow_dependencies[fw_align].append(fw_sort)

        name = "Count_%s" % os.path.basename(library_dir)
        fw_count = Firework(
            [
                CountTask(library_path=library_dir,
                          aligned_name="aligned",
                          bammed_name="bammed",
                          counted_name="counted")
            ],
            name=name,
            spec={"_queueadapter": {
                "job_name": name
            }},
        )
        workflow_fireworks.append(fw_count)
        workflow_dependencies[fw_sort].append(fw_count)

    lpad.add_wf(Workflow(workflow_fireworks, links_dict=workflow_dependencies))
示例#7
0
def make_md_workflow(sim,
                     archive,
                     stages,
                     md_engine='gromacs',
                     md_category='md',
                     local_category='local',
                     postrun_wf=None,
                     post_wf=None,
                     files=None):
    """Construct a general, single MD simulation workflow.

    Assumptions
    -----------
    Queue launcher submission script must define and export the following
    environment variables:

        1. STAGING : absolute path on resource to staging directory
        2. SCRATCH : absolute path on resource to scratch directory

    The staging directory must already exist on all resources specified in
    ``stages``.

    The script ``run_md.sh`` must be somewhere on your path, and must take
    a single argument giving the directory to execute MD out of. It should
    create and change the working directory to that directory before anything
    else.

    Parameters
    ----------
    sim : str
        MDSynthesis Sim.
    archive : str
        Absolute path to directory to launch from, which holds all required
        files for running MD. 
    stages : list, str
        Dicts giving for each of the following keys:
            - 'server': server host to transfer to
            - 'user': username to authenticate with
            - 'staging': absolute path to staging area on remote resource
        alternatively, a path to a yaml file giving a list of dictionaries
        with the same information.
    md_engine : {'gromacs'}
        MD engine name; needed to determine continuation mechanism to use.
    md_category : str
        Category to use for the MD Firework. Used to target to correct rockets.
    local_category : str
        Category to use for non-MD Fireworks, which should be run by rockets
        where the ``archive`` directory is accessible.
    postrun_wf : Workflow
        Workflow to perform after each copyback; performed in parallel to continuation run.
    post_wf : Workflow
        Workflow to perform after completed MD (no continuation); use for final
        postprocessing. 
    files : list 
        Names of files (not paths) needed for each leg of the simulation. Need
        not exist, but if they do they will get staged before each run.

    Returns
    -------
    workflow 
        MD workflow; can be submitted to LaunchPad of choice.

    """
    sim = mds.Sim(sim)

    #TODO: perhaps move to its own FireTask?
    sim.categories['md_status'] = 'running'

    #TODO: the trouble with this is that if this workflow is created with the intent
    #      of being attached to another, these files may not exist at all yet
    f_exist = [f for f in files if os.path.exists(os.path.join(archive, f))]

    if isinstance(stages, string_types):
        with open(stages, 'r') as f:
            stages = yaml.load(f)

    ## Stage files on all resources where MD may run; takes place locally
    fts_stage = list()
    for stage in stages:
        fts_stage.append(
            FileTransferTask(mode='rtransfer',
                             server=stage['server'],
                             user=stage['user'],
                             files=[os.path.join(archive, i) for i in files],
                             dest=os.path.join(stage['staging'], sim.uuid),
                             max_retry=5,
                             shell_interpret=True))

    fw_stage = Firework(fts_stage,
                        spec={
                            '_launch_dir': archive,
                            '_category': local_category
                        },
                        name='staging')

    ## MD execution; takes place in queue context of compute resource

    # make rundir
    ft_mkdir = MkRunDirTask(uuid=sim.uuid)

    # copy input files to scratch space
    ft_copy = FileTransferTask(
        mode='copy',
        files=[os.path.join('${STAGING}/', sim.uuid, i) for i in files],
        dest=os.path.join('${SCRATCHDIR}/', sim.uuid),
        ignore_missing=True,
        shell_interpret=True)

    # next, run MD
    ft_md = ScriptTask(script='run_md.sh {}'.format(
        os.path.join('${SCRATCHDIR}/', sim.uuid)),
                       use_shell=True,
                       fizzle_bad_rc=True)

    # send info on where files live to pull firework
    ft_info = BeaconTask(uuid=sim.uuid)

    fw_md = Firework([ft_mkdir, ft_copy, ft_md, ft_info],
                     spec={'_category': md_category},
                     name='md')

    ## Pull files back to archive; takes place locally
    ft_copyback = FilePullTask(dest=archive)

    fw_copyback = Firework([ft_copyback],
                           spec={
                               '_launch_dir': archive,
                               '_category': local_category
                           },
                           name='pull')

    ## Decide if we need to continue and submit new workflow if so; takes place
    ## locally

    if md_engine == 'gromacs':
        ft_continue = GromacsContinueTask(sim=sim,
                                          archive=archive,
                                          stages=stages,
                                          md_engine=md_engine,
                                          md_category=md_category,
                                          local_category=local_category,
                                          postrun_wf=postrun_wf,
                                          post_wf=post_wf,
                                          files=files)
    else:
        raise ValueError("No known md engine `{}`.".format(md_engine))

    fw_continue = Firework([ft_continue],
                           spec={
                               '_launch_dir': archive,
                               '_category': local_category
                           },
                           name='continue')

    wf = Workflow([fw_stage, fw_md, fw_copyback, fw_continue],
                  links_dict={
                      fw_stage: [fw_md],
                      fw_md: [fw_copyback],
                      fw_copyback: [fw_continue]
                  },
                  name='{} | md'.format(sim.name),
                  metadata=dict(sim.categories))

    ## Mix in postrun workflow, if given
    if postrun_wf:
        if isinstance(postrun_wf, dict):
            postrun_wf = Workflow.from_dict(postrun_wf)

        wf.append_wf(Workflow.from_wflow(postrun_wf), [fw_copyback.fw_id])

    return wf