def setUp(self): try: __import__('igraph', fromlist=['Graph']) except (ImportError, ModuleNotFoundError): raise unittest.SkipTest('Skipping because python-igraph not installed') self.fw1 = Firework( PyTask( func='math.pow', inputs=['base', 'exponent'], outputs=['first power'] ), name='pow(2, 3)', spec={'base': 2, 'exponent': 3} ) self.fw2 = Firework( PyTask( func='math.pow', inputs=['first power', 'exponent'], outputs=['second power'] ), name='pow(pow(2, 3), 4)', spec={'exponent': 4} ) self.fw3 = Firework( PyTask( func='print', inputs=['second power'] ), name='the third one' )
def test_parse_pass_write(self): input_file = "test.qin.opt_1" output_file = "test.qout.opt_1" calc_dir = os.path.join(module_dir, "..", "..", "test_files", "FF_working") p_task = QChemToDb(calc_dir=calc_dir, input_file=input_file, output_file=output_file, db_file=">>db_file<<") fw1 = Firework([p_task]) w_task = WriteInputFromIOSet(qchem_input_set="OptSet", write_to_dir=module_dir) fw2 = Firework([w_task], parents=fw1) wf = Workflow([fw1, fw2]) self.lp.add_wf(wf) rapidfire( self.lp, fworker=FWorker(env={"db_file": os.path.join(db_dir, "db.json")})) test_mol = QCInput.from_file(os.path.join(module_dir, "mol.qin")).molecule np.testing.assert_equal(self.act_mol.species, test_mol.species) np.testing.assert_equal(self.act_mol.cart_coords, test_mol.cart_coords)
def test_parse_pass_rotate_write(self): input_file = "pt_gs_wb97mv_tz_initial.in" output_file = "pt_gs_wb97mv_tz_initial_1_job.out" calc_dir = os.path.join(module_dir, "..", "..", "test_files") p_task = QChemToDb(calc_dir=calc_dir, input_file=input_file, output_file=output_file, db_file=">>db_file<<") fw1 = Firework([p_task]) atom_indexes = [6, 8, 9, 10] angle = 90.0 rot_task = RotateTorsion(atom_indexes=atom_indexes, angle=angle) w_task = WriteInputFromIOSet(qchem_input_set="OptSet", write_to_dir=module_dir) fw2 = Firework([rot_task, w_task], parents=fw1) wf = Workflow([fw1, fw2]) self.lp.add_wf(wf) rapidfire( self.lp, fworker=FWorker(env={"db_file": os.path.join(db_dir, "db.json")})) test_mol = QCInput.from_file(os.path.join(module_dir, "mol.qin")).molecule act_mol = Molecule.from_file( os.path.join(module_dir, "..", "..", "test_files", "pt_rotated_90.0.xyz")) np.testing.assert_equal(act_mol.species, test_mol.species) np.testing.assert_allclose(act_mol.cart_coords, test_mol.cart_coords, atol=0.0001)
def test_use_fake_qchem(self): input_file = "test.qin.opt_1" output_file = "test.qout.opt_1" calc_dir = os.path.join(module_dir, "..", "test_files", "FF_working") run_task = RunQChemDirect( qchem_cmd="should not need this going to be replaced with fake run" ) p_task = QChemToDb(calc_dir=calc_dir, input_file=input_file, output_file=output_file) fw1 = Firework([run_task, p_task], name="test_fake_run") w_task = WriteInputFromIOSet(qchem_input_set="OptSet", write_to_dir=module_dir) fw2 = Firework([w_task], parents=fw1, name="test_write") wf = Workflow([fw1, fw2]) ref_dirs = {"test_fake_run": os.path.join(calc_dir, output_file)} fake_run_wf = use_fake_qchem(wf, ref_dirs) test_fake_run = False for fw in fake_run_wf.fws: if fw.name == "test_fake_run": for t in fw.tasks: if "RunQChemFake" in str(t): test_fake_run = True self.assertTrue(test_fake_run)
def execute(self, name): try: self.state.saveState('READY') lp = LaunchPad(**self.db) lp.reset('', require_password=False) tasks = [] for idx, command in enumerate(self.commands): if idx > 0: tasks.append( Firework(ScriptTask.from_str(command), name=f'task_{idx}', fw_id=idx, parents=[tasks[idx - 1]])) else: tasks.append( Firework(ScriptTask.from_str(command), name=f'task_{idx}', fw_id=idx)) self.state.saveState('RUNNING') wf = Workflow(tasks, name=name) lp.add_wf(wf) rapidfire(lp) self.state.saveState('FINISHED') except Exception as e: print(e) self.state.saveState('ERROR')
def freq_fw(self, charge, spin_multiplicity, fw_id_cal, fw_id_db, priority=None, method=None): if not method: if self.large: method = "PBE-D3/6-31+G*" else: method = "B3lYP/6-31+G*" task_type = "vibrational frequency" state_name = self.get_state_name(charge, spin_multiplicity) title = self.molname + " " + state_name + " " + method + " " + task_type exchange, correlation, basis_set, aux_basis, rem_params, method_token, ecp = self. \ get_exchange_correlation_basis_auxbasis_remparams(method, self.mol) if exchange.lower() in ["xygjos"]: rem_params["IDERIV"] = 1 qctask = QcTask(self.mol, charge=charge, spin_multiplicity=spin_multiplicity, jobtype="freq", title=title, exchange=exchange, correlation=correlation, basis_set=basis_set, aux_basis_set=aux_basis, ecp=ecp, rem_params=rem_params) if self.large: qctask.set_scf_algorithm_and_iterations(iterations=100) qcinp = QcInput([qctask]) spec = self.base_spec() spec["qcinp"] = qcinp.as_dict() spec['task_type'] = task_type spec['charge'] = charge spec['spin_multiplicity'] = spin_multiplicity spec['run_tags']['methods'] = method_token spec["qm_method"] = method if priority: spec['_priority'] = priority task_name = self.molname + ' ' + state_name + ' ' + task_type from rubicon.firetasks.qchem.multistep_qchem_task \ import QChemFrequencyDBInsertionTask fw_freq_cal = Firework([QChemTask()], spec=spec, name=task_name, fw_id=fw_id_cal) spec_db = copy.deepcopy(spec) del spec_db['_dupefinder'] spec_db['_allow_fizzled_parents'] = True spec_db['task_type'] = task_type + ' DB Insertion' del spec_db["_trackers"][:2] task_name_db = task_name + " DB Insertion" fw_freq_db = Firework([QChemFrequencyDBInsertionTask()], spec=spec_db, name=task_name_db, fw_id=fw_id_db) return fw_freq_cal, fw_freq_db
def test_get_lp_and_fw_id_from_task(self): """ Tests the get_lp_and_fw_id_from_task. This test relies on the fact that the LaunchPad loaded from auto_load will be different from what is defined in TESTDB_NAME. If this is not the case the test will be skipped. """ lp = LaunchPad.auto_load() if not lp or lp.db.name == TESTDB_NAME: raise unittest.SkipTest( "LaunchPad lp {} is not suitable for this test. Should be available and different" "from {}".format(lp, TESTDB_NAME)) task = LpTask() # this will pass the lp fw1 = Firework([task], spec={'_add_launchpad_and_fw_id': True}, fw_id=1) # this will not have the lp and should fail fw2 = Firework([task], spec={}, fw_id=2, parents=[fw1]) wf = Workflow([fw1, fw2]) self.lp.add_wf(wf) rapidfire(self.lp, self.fworker, m_dir=MODULE_DIR, nlaunches=1) fw = self.lp.get_fw_by_id(1) assert fw.state == "COMPLETED" rapidfire(self.lp, self.fworker, m_dir=MODULE_DIR, nlaunches=1) fw = self.lp.get_fw_by_id(2) assert fw.state == "FIZZLED"
def test_get_time_report_for_wf(self): task = PyTask(func="time.sleep", args=[0.5]) fw1 = Firework([task], spec={ 'wf_task_index': "test1_1", "nproc": 16 }, fw_id=1) fw2 = Firework([task], spec={ 'wf_task_index': "test2_1", "nproc": 16 }, fw_id=2) wf = Workflow([fw1, fw2]) self.lp.add_wf(wf) rapidfire(self.lp, self.fworker, m_dir=MODULE_DIR) wf = self.lp.get_wf_by_fw_id(1) assert wf.state == "COMPLETED" tr = get_time_report_for_wf(wf) assert tr.n_fws == 2 assert tr.total_run_time > 1
def setUp(self): try: __import__("igraph", fromlist=["Graph"]) except (ImportError, ModuleNotFoundError): raise unittest.SkipTest( "Skipping because python-igraph not installed") self.fw1 = Firework( PyTask(func="math.pow", inputs=["base", "exponent"], outputs=["first power"]), name="pow(2, 3)", spec={ "base": 2, "exponent": 3 }, ) self.fw2 = Firework( PyTask(func="math.pow", inputs=["first power", "exponent"], outputs=["second power"]), name="pow(pow(2, 3), 4)", spec={"exponent": 4}, ) self.fw3 = Firework(PyTask(func="print", inputs=["second power"]), name="the third one")
def test_early_exit(self): os.chdir(MODULE_DIR) script_text = "echo hello from process $PPID; sleep 2" fw1 = Firework(ScriptTask.from_str(shell_cmd=script_text, parameters={"stdout_file": "task.out"}), fw_id=1) fw2 = Firework(ScriptTask.from_str(shell_cmd=script_text, parameters={"stdout_file": "task.out"}), fw_id=2) fw3 = Firework(ScriptTask.from_str(shell_cmd=script_text, parameters={"stdout_file": "task.out"}), fw_id=3) fw4 = Firework(ScriptTask.from_str(shell_cmd=script_text, parameters={"stdout_file": "task.out"}), fw_id=4) wf = Workflow([fw1, fw2, fw3, fw4], {1: [2, 3], 2: [4], 3: [4]}) self.lp.add_wf(wf) launch_multiprocess(self.lp, FWorker(), 'DEBUG', 0, 2, sleep_time=0.5) fw2 = self.lp.get_fw_by_id(2) fw3 = self.lp.get_fw_by_id(3) with open(os.path.join(fw2.launches[0].launch_dir, "task.out")) as f: fw2_text = f.read() with open(os.path.join(fw3.launches[0].launch_dir, "task.out")) as f: fw3_text = f.read() self.assertNotEqual(fw2_text, fw3_text)
def main(sequencing_directory, library_prefix, num_libraries, raw_data_dir): lpad = LaunchPad(**yaml.load(open("my_launchpad.yaml"))) workflow_fireworks = [] workflow_dependencies = collections.defaultdict(list) library_dirs = [ os.path.join(sequencing_directory, library_prefix + str(i + 1)) for i in xrange(num_libraries) ] subdirs = ["aligned_star", "quant_rsem", "counted_rsem"] for library_dir in library_dirs: seq_functions.make_directories(library_dir, subdirs) for library_dir in library_dirs: seq_functions.make_directories(library_dir, subdirs) name = "AlignSTAR_%s" % os.path.basename(library_dir) fw_align = Firework( [ Align_star_Task(library_path=library_dir, trimmed_name="trimmed", aligned_name="aligned_star/", quant_name="quant_rsem/") ], name=name, spec={ "_queueadapter": { "job_name": name, "ntasks_per_node": 8, "walltime": '24:00:00' } }, ) workflow_fireworks.append(fw_align) name = "Count_%s" % os.path.basename(library_dir) fw_count = Firework( [ Count_rsem_Task(library_path=library_dir, aligned_name="aligned_star", quant_name="quant_rsem", counted_name="counted_rsem", spikeids=[ 'AM1780SpikeIn1', 'AM1780SpikeIn4', 'AM1780SpikeIn7' ]) ], name=name, spec={"_queueadapter": { "job_name": name }}, ) workflow_fireworks.append(fw_count) workflow_dependencies[fw_align].append(fw_count) lpad.add_wf(Workflow(workflow_fireworks, links_dict=workflow_dependencies))
def test_parentconnector(self): fw1 = Firework(ScriptTask.from_str('echo "1"')) fw2 = Firework(ScriptTask.from_str('echo "1"'), parents=fw1) fw3 = Firework(ScriptTask.from_str('echo "1"'), parents=[fw1, fw2]) self.assertEqual( Workflow([fw1, fw2, fw3]).links, {fw1.fw_id: [fw2.fw_id, fw3.fw_id], fw2.fw_id: [fw3.fw_id], fw3.fw_id: []} ) self.assertRaises(ValueError, Workflow, [fw1, fw3]) # can't make this
def main(algorithms, supports, datasets, working_directory, reset, launchpad_args): launchpad = LaunchPad(**launchpad_args) if reset: launchpad.reset("", require_password=False) working_directory = os.path.expandvars( os.path.expanduser(working_directory)) for dataset in datasets: for algorithm in algorithms: tasks = [] links = {} spec = {} spec["directory"] = os.path.join(working_directory, algorithm) spec["local_files"] = [os.path.abspath(dataset)] spec["_priority"] = 1000 try: short_working_directory = spec["directory"].replace( os.path.expanduser('~'), '~', 1) except ValueError: short_working_directory = spec["directory"] setup_firework_name = "setup {0}".format(short_working_directory) setup_firework = Firework(SetupWorkingDirectory(), spec=spec, name=setup_firework_name) tasks.append(setup_firework) for support in supports: spec = {} spec["database_filename"] = os.path.basename(dataset) spec["algorithm"] = algorithm spec["support"] = support spec["timings_filename"] = "{0}.{1}.{2}.timing".format( os.path.basename(dataset), algorithm, str(support).zfill(3)) spec["patterns_filename"] = "{0}.{1}.{2}.patterns.gz".format( os.path.basename(dataset), algorithm, str(support).zfill(3)) spec["_priority"] = support support_firework_name = "{0} {1}".format(algorithm, support) support_firework = Firework(SPMFTask(), spec=spec, name=support_firework_name) tasks.append(support_firework) links.setdefault(setup_firework, []).append(support_firework) workflow_name = "{0} {1}".format(os.path.basename(dataset), algorithm) workflow = Workflow(tasks, links, name=workflow_name) launchpad.add_wf(workflow)
def run_task(self, fw_spec): objective_with_inc = fw_spec["%s_eval_metrics_with_inc" % EVAL_SCRIPT][OBJECTIVE_METRIC] objective_with_dec = fw_spec["%s_eval_metrics_with_dec" % EVAL_SCRIPT][OBJECTIVE_METRIC] orig_objective = fw_spec["%s_eval_metrics" % EVAL_SCRIPT][OBJECTIVE_METRIC] if orig_objective>=objective_with_inc and orig_objective>=objective_with_dec: fw_spec['coord_ascent_params'] = fw_spec['orig_param_val']#update parameter mod_alpha(fw_spec, 'dec') best_obj = fw_spec["%s_eval_metrics" % EVAL_SCRIPT][OBJECTIVE_METRIC] change_for_best_obj = 'const' elif objective_with_dec>=objective_with_inc and objective_with_dec>orig_objective: fw_spec['coord_ascent_params'] = fw_spec['dec_param_val']#update parameter mod_alpha(fw_spec, 'inc') fw_spec["%s_eval_metrics" % EVAL_SCRIPT] = fw_spec["%s_eval_metrics_with_dec" % EVAL_SCRIPT]#update baseline metrics for next iteration best_obj = fw_spec["%s_eval_metrics_with_dec" % EVAL_SCRIPT][OBJECTIVE_METRIC] change_for_best_obj = 'dec' elif objective_with_inc>objective_with_dec and objective_with_inc>orig_objective: fw_spec['coord_ascent_params'] = fw_spec['inc_param_val']#update parameter mod_alpha(fw_spec, 'inc') fw_spec["%s_eval_metrics" % EVAL_SCRIPT] = fw_spec["%s_eval_metrics_with_inc" % EVAL_SCRIPT]#update baseline metrics for next iteration best_obj = fw_spec["%s_eval_metrics_with_inc" % EVAL_SCRIPT][OBJECTIVE_METRIC] change_for_best_obj = 'inc' else: print "Coding Error ChooseNextIter()" print (objective_with_inc, objective_with_dec, orig_objective) sys.exit(1); fw_spec['param_idx'] = inc_parameter_idx(fw_spec['param_idx'], fw_spec) if fw_spec['param_idx'] == 0: val_spec = copy.deepcopy(fw_spec) val_spec['TRAINING_SEQUENCES'] = VALIDATION_SEQUENCES val_spec['seq_idx_to_eval'] = VALIDATION_SEQUENCES val_spec['validation_eval'] = True val_batch = Firework(RunRBPF_Batch(), spec = val_spec) val_eval = Firework(RunEval(), spec = val_spec) storeResultsFW = Firework(StoreResultsInDatabase(), spec=val_spec) next_iter_firework = Firework(Iterate(), fw_spec) workflow = Workflow([val_batch, val_eval, storeResultsFW, next_iter_firework], {val_batch: [val_eval], val_eval: [storeResultsFW]}) else: next_iter_firework = Firework(Iterate(), fw_spec) workflow = Workflow([next_iter_firework]) return FWAction(stored_data = {'best_obj': best_obj, 'change_for_best_obj': change_for_best_obj, 'parameter_changed_val': fw_spec['coord_ascent_params']}, additions = workflow)
def test_add_priority(self): fw1 = Firework([ScriptTask(script=None)], fw_id=-1) fw2 = Firework([ScriptTask(script=None)], parents=[fw1], fw_id=-2) fw3 = Firework([ScriptTask(script=None)], parents=[fw1], fw_id=-3) wf = Workflow([fw1, fw2, fw3]) wf = add_priority(wf, 4, 8) self.assertEqual(wf.id_fw[-1].spec["_priority"], 4) self.assertEqual(wf.id_fw[-2].spec["_priority"], 8) self.assertEqual(wf.id_fw[-3].spec["_priority"], 8)
def test_fwconnector(self): fw1 = Firework(ScriptTask.from_str('echo "1"')) fw2 = Firework(ScriptTask.from_str('echo "1"')) wf1 = Workflow([fw1, fw2], {fw1.fw_id: fw2.fw_id}) self.assertEqual(wf1.links, {fw1.fw_id: [fw2.fw_id], fw2.fw_id: []}) wf2 = Workflow([fw1, fw2], {fw1: fw2}) self.assertEqual(wf2.links, {fw1.fw_id: [fw2.fw_id], fw2.fw_id: []}) wf3 = Workflow([fw1, fw2]) self.assertEqual(wf3.links, {fw1.fw_id: [], fw2.fw_id: []})
def submitRelax(self): from fireworks import Firework, Workflow from standardScripts import Relax, GetXCcontribs, SaveResults clusters = self.allocate(3) print 'submitting to ', clusters timestamp = '_' + datetime.now().strftime('%Y_%m_%d_%H_%M') names = [ x + '_%d' % self.jobid for x in ['Relax', 'GetXCcontribs', 'SaveResults'] ] fw1 = Firework( [Relax()], name=names[0], spec={ "jobID": self.jobid, '_fworker': clusters[0].fworker, "_pass_job_info": True, "_files_out": { "fw1": "inp.gpw" }, "_queueadapter": clusters[0].qfunc(self.guessTime('Relax')), "_launch_dir": clusters[0].launchdir + names[0] + timestamp }) fw2 = Firework( [GetXCcontribs()], name=names[1], parents=[fw1], spec={ "jobID": self.jobid, '_fworker': clusters[1].fworker, "_pass_job_info": True, "_files_in": { "fw1": "inp.gpw" }, "_queueadapter": clusters[1].qfunc(self.guessTime('GetXCcontribs')), "_launch_dir": clusters[1].launchdir + names[1] + timestamp }) fw3 = Firework( [SaveResults()], name=names[2], parents=[fw1, fw2], spec={ "jobID": self.jobid, '_fworker': clusters[2].fworker #MUST be sherlock , "_queueadapter": clusters[2].qfunc(self.guessTime('SaveResults')), "_launch_dir": clusters[2].launchdir + names[2] + timestamp }) return Workflow([fw1, fw2, fw3], name='BulkRelaxation_%d' % self.jobid)
def main(sequencing_directory, library_prefix, num_libraries, raw_data_dir): lpad = LaunchPad(**yaml.load(open("my_launchpad.yaml"))) workflow_fireworks = [] workflow_dependencies = collections.defaultdict(list) library_dirs = [ os.path.join(sequencing_directory, library_prefix + str(i + 1)) for i in xrange(num_libraries) ] subdirs = [ 'unzipped', 'trimmed', 'aligned', 'bammed', 'sorted', 'counted', 'pythonized' ] for library_dir in library_dirs: seq_functions.make_directories(library_dir, subdirs) name = "Sort_%s" % os.path.basename(library_dir) fw_sort = Firework( [ SortTask(library_path=library_dir, aligned_name="aligned", bammed_name="bammed", sorted_name="sorted") ], name=name, spec={"_queueadapter": { "job_name": name }}, ) workflow_fireworks.append(fw_sort) name = "Count_%s" % os.path.basename(library_dir) fw_count = Firework( [ CountTask(library_path=library_dir, aligned_name="aligned", bammed_name="bammed", counted_name="counted") ], name=name, spec={"_queueadapter": { "job_name": name }}, ) workflow_fireworks.append(fw_count) workflow_dependencies[fw_sort].append(fw_count) lpad.add_wf(Workflow(workflow_fireworks, links_dict=workflow_dependencies))
def combine_fws_parallely(swarmpad, fw_ids): """ Combine a set of firetasks into a single firetask which runs all the given tasks parallely Args: swarmpad (SwarmPad) fw_ids (list): id of the fireworks to be combined Returns: combined_firework (FireWork): Parallely combined FireWork object """ # Get each task in each firework and append to firetask list in the order of traversal firetasks_to_combine = [] for fw_id in fw_ids: firetask_in_fw = swarmpad.get_fw_by_id(fw_id).spec['_tasks'][0] if isinstance(firetask_in_fw, ScriptTask): firetasks_to_combine.append(firetask_in_fw) else: raise ValueError('Spec of Firework with id {} does not contain an object of type ScriptTask '.format(fw_id)) combined_firetask = ParallelTask.from_firetasks(firetasks_to_combine) combined_firework = Firework(combined_firetask) swarmpad.m_logger.info('Parallely Clustered {} to firework_id {}'.format(fw_ids, combined_firework.fw_id)) return combined_firework
def create_firework(cls, filename, spec): cur_dir = os.getcwd() task_path = cur_dir + "/" + filename command = "sh " + task_path task = ScriptTask.from_str(command) firework = Firework(task, spec=spec) return firework
def test_add_wf(self): fw = Firework(ScriptTask.from_str('echo "hello"'), name="hello") self.lp.add_wf(fw) wf_id = self.lp.get_wf_ids() self.assertEqual(len(wf_id), 1) for fw_id in self.lp.get_wf_ids(): wf = self.lp.get_wf_by_fw_id_lzyfw(fw_id) self.assertEqual(len(wf.id_fw.keys()), 1) fw2 = Firework(ScriptTask.from_str('echo "goodbye"'), name="goodbye") wf = Workflow([fw, fw2], name='test_workflow') self.lp.add_wf(wf) #fw = self.lp.get_fw_ids() #self.assertEqual(len(wf.id_fw.keys()), 2) fw_ids = self.lp.get_fw_ids() self.assertEqual(len(fw_ids), 3) self.lp.reset('', require_password=False)
def wf_creator(x): """ The workflow creator function required by rocketsled. This wf_creator takes in an input vector x and returns a workflow which calculates y, the output. The requirements for using this wf_creator with rocketsled are: 1. OptTask is passed into a FireWork in the workflow 2. The fields "_x" and "_y" are written to the spec of the FireWork containing OptTask. 3. You use MissionControl's "configure" method to set up the optimization, and pass in wf_creator as it's first argument. Args: x (list): The wf_creator input vector. In this example, it is just 3 integers between 1 and 5 (inclusive). Returns: (Workflow): A workflow containing one FireWork (two FireTasks) which is automatically set up to run the optimization loop. """ spec = {'_x': x} # ObjectiveFuncTask writes _y field to the spec internally. firework1 = Firework([ObjectiveFuncTask(), OptTask(**db_info)], spec=spec) return Workflow([firework1])
def wf_single_fit(fworker, fit_name, pipe_config, name, data_pickle, target, *args, tags=None, **kwargs): """ Submit a dataset to be fit for a single pipeline (i.e., to train on a dataset for real predictions). """ check_pipe_config(pipe_config) warnings.warn("Single fitted MatPipe not being stored in automatminer db " "collections. Please consult fw_spec to find the benchmark " "on {}".format(fworker)) if fworker not in valid_fworkers: raise ValueError("fworker must be in {}".format(valid_fworkers)) base_save_dir = get_time_str() + "_single_fit" spec = { "pipe_config": pipe_config, "base_save_dir": base_save_dir, "data_pickle": data_pickle, "target": target, "automatminer_commit": get_last_commit(), "tags": tags if tags else [], "_fworker": fworker } fw_name = "{} single fit".format(name) wf_name = "single fit: {} ({}) [{}]".format(name, fit_name, fworker) fw = Firework(RunSingleFit(), spec=spec, name=fw_name) wf = Workflow([fw], metadata={"tags": tags}, name=wf_name) return wf
def run_task(self, fw_spec): rbpf_batch = [] if fw_spec['coord_ascent_iter'] > 0: assert('mod_direction' in fw_spec) fw_spec['results_folder'] = "%s/iterID_%d_dir-%s"%(fw_spec['results_folder'], fw_spec['coord_ascent_iter'], fw_spec['mod_direction']) else: fw_spec['results_folder'] = "%s/iterID_%d"%(fw_spec['results_folder'], fw_spec['coord_ascent_iter']) setup_results_folder(fw_spec['results_folder']) rbpf_batch = [] for run_idx in range(1, fw_spec['NUM_RUNS']+1): for seq_idx in fw_spec['TRAINING_SEQUENCES']: cur_spec = copy.deepcopy(fw_spec) cur_spec['run_idx'] = run_idx cur_spec['seq_idx'] = seq_idx # Q_idx = fw_spec['Q_idx'] # if fw_spec['mod_direction'] == 'inc': # cur_spec['Q'][Q_idx//4][Q_idx%4] += cur_spec['Q'][Q_idx//4][Q_idx%4]*fw_spec['mod_percent']/100.0 # elif fw_spec['mod_direction'] == 'dec': # cur_spec['Q'][Q_idx//4][Q_idx%4] -= cur_spec['Q'][Q_idx//4][Q_idx%4]*fw_spec['mod_percent']/100.0 # else: # assert(fw_spec['mod_direction'] == 'const') cur_firework = Firework(RunRBPF(), spec=cur_spec) rbpf_batch.append(cur_firework) parallel_workflow = Workflow(rbpf_batch) return FWAction(detours=parallel_workflow, mod_spec=[{'_set': {"results_folder": fw_spec['results_folder']}}])
def get_mae(target_path): """ Creates Firework from MLTask. It predicts the property of all uncomputed structures in the workflow. It is trained on all converged structures. Crossvalidation is used to infer the optimal machine learning hyperparameters. Currently, only KRR (kernel ridge regression) is implemented. A new document is added to the machine_learning collection. Args: target_path (str) : absolute path to the target directory (needs to exist) on the computing resource. Returns: FWAction : Firework action, updates fw_spec, possibly defuses workflow upon failure. """ firetask1 = MLTask(target_path=target_path) fw = Firework([firetask1], spec={ '_category': "medium", 'name': 'MLTask' }, name='MLWork') return fw
def initialize_workflow_data(username, password, parameters, name = "UNNAMED", workflow_type = "UNNAMED", extdb_connect = {}): """ Creates a custom Firework object to initialize the workflow. It updates the workflow collection and makes a few entries in the fw_spec. Args: username (str) : username for the mongodb database password (str) : password for the mongodb database parameters (dict) : workflow-specific input parameters name (str) : custom name of the workflow workflow_type (str) : custom workflow type extdb_connect (dict): dictionary optionally containing the keys host, authsource and db_name. All fields have a default value. Returns: Firework object : InitialWork """ firetask1 = InitialTask(username = username, password = password, parameters = parameters, name = name, workflow_type = workflow_type, extdb_connect = extdb_connect) fw = Firework([firetask1], spec = {'_category' : "lightweight", 'name' : 'InitialTask'}, name = 'InitialWork') return fw
def test_run(self): db = DatabaseData(self.lp.name, collection="test_MongoEngineDBInsertionTask", username=self.lp.username, password=self.lp.password) task = MongoEngineDBInsertionTask(db) fw = Firework([task], fw_id=1, spec={"_add_launchpad_and_fw_id": True}) wf = Workflow( [fw], metadata={ 'workflow_class': SaveDataWorkflow.workflow_class, 'workflow_module': SaveDataWorkflow.workflow_module }) self.lp.add_wf(wf) rapidfire(self.lp, self.fworker, m_dir=MODULE_DIR, nlaunches=1) wf = self.lp.get_wf_by_fw_id(1) assert wf.state == "COMPLETED" # retrived the saved object # error if not imported locally from abiflows.fireworks.tasks.tests.mock_objects import DataDocument db.connect_mongoengine() with db.switch_collection(DataDocument) as DataDocument: data = DataDocument.objects() assert len(data) == 1 assert data[0].test_field_string == "test_text" assert data[0].test_field_int == 5
def wf_single_fit(fworker, fit_name, pipe_config, name, df, target, tags=None): """ Submit a dataset to be fit for a single pipeline (i.e., to train on a dataset for real predictions). """ # todo this is not working probably warnings.warn("Single fitted MatPipe not being stored in automatminer db " "collections. Please consult fw_spec to find the benchmark " "on {}".format(fworker)) if fworker not in VALID_FWORKERS: raise ValueError("fworker must be in {}".format(VALID_FWORKERS)) data_file = None now = get_time_str() base_save_dir = now + "_single_fit" spec = { "pipe_config": pipe_config, "base_save_dir": base_save_dir, "data_file": data_file, "target": target, "automatminer_commit": get_last_commit(), "tags": tags if tags else [], "_fworker": fworker, } fw_name = "{} single fit".format(name) wf_name = "single fit: {} ({}) [{}]".format(name, fit_name, fworker) fw = Firework(RunSingleFit(), spec=spec, name=fw_name) wf = Workflow([fw], metadata={"tags": tags}, name=wf_name) return wf
def wf_creator_basic(x): """Testing a basic workflow with one Firework, and two FireTasks.""" spec = {'_x': x} bt = BasicTestTask() ot = OptTask(**db_info) firework1 = Firework([bt, ot], spec=spec) return Workflow([firework1])
def test_postproc_exception(self): fw = Firework(MalformedAdditionTask()) self.lp.add_wf(fw) launch_rocket(self.lp, self.fworker) fw = self.lp.get_fw_by_id(1) self.assertEqual(fw.state, "FIZZLED")