def from_chemsys(cls, chemsys): """ Class factory method for constructing campaign from chemsys. Args: chemsys (str): chemical system for the campaign Returns: (ProtoDFTCampaign): Standard proto-dft campaign from the chemical system """ s3_prefix = "proto-dft-2/runs/{}".format(chemsys) # Initialize s3 dumpfn({ "started": datetime.now().isoformat(), "version": __version__ }, "start.json") s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') # Get structure domain element_list = chemsys.split('-') max_coeff, charge_balanced = heuristic_setup(element_list) domain = StructureDomain.from_bounds(element_list, charge_balanced=charge_balanced, n_max_atoms=20, **{'grid': range(1, max_coeff)}) candidate_data = domain.candidates() # Dump structure/candidate data with open('candidate_data.pickle', 'wb') as f: pickle.dump(candidate_data, f) # Set up agents and loop parameters agent = AgentStabilityAdaBoost( model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=10, hull_distance=0.2, exploit_fraction=1.0, uncertainty=True, alpha=0.5, diversify=True, n_estimators=20) analyzer = StabilityAnalyzer(hull_distance=0.2) experiment = OqmdDFTonMC1(timeout=30000) seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") # Construct and start loop return cls(candidate_data=candidate_data, agent=agent, experiment=experiment, analyzer=analyzer, seed_data=seed_data, heuristic_stopper=5, s3_prefix="proto-dft/runs/{}".format(chemsys))
def test_heuristic_setup(self): element_list = ['Al', 'Fe'] g_max, charge_balanced = heuristic_setup(element_list) self.assertEqual(g_max, 5) self.assertFalse(charge_balanced) sd = StructureDomain.from_bounds(element_list, charge_balanced=charge_balanced, **{'grid': range(1, g_max)}) self.assertEqual(len(sd.formulas), 11) self.assertIn('Al3Fe4', sd.formulas) element_list = ['Al', 'Fe', 'Ti'] g_max, charge_balanced = heuristic_setup(element_list) self.assertEqual(g_max, 5) self.assertFalse(charge_balanced) sd = StructureDomain.from_bounds(element_list, charge_balanced=charge_balanced, **{'grid': range(1, g_max)}) self.assertEqual(len(sd.formulas), 55) self.assertIn('Al3Fe2Ti2', sd.formulas) element_list = ['Al', 'Fe', 'Ti', 'Mn'] g_max, charge_balanced = heuristic_setup(element_list) self.assertEqual(g_max, 4) self.assertFalse(charge_balanced) sd = StructureDomain.from_bounds(element_list, charge_balanced=charge_balanced, **{'grid': range(1, g_max)}) self.assertEqual(len(sd.formulas), 79) self.assertIn('Al2Fe2Ti2Mn3', sd.formulas) element_list = ['Al', 'O'] g_max, charge_balanced = heuristic_setup(element_list) self.assertTrue(charge_balanced) sd = StructureDomain.from_bounds(element_list, charge_balanced=charge_balanced, **{'grid': range(1, g_max)}) self.assertEqual(len(sd.formulas), 1) self.assertIn('Al2O3', sd.formulas) element_list = ['Fe', 'O'] g_max, charge_balanced = heuristic_setup(element_list) self.assertTrue(charge_balanced) sd = StructureDomain.from_bounds(element_list, charge_balanced=charge_balanced, **{'grid': range(1, g_max)}) self.assertEqual(len(sd.formulas), 7) self.assertIn('Fe3O4', sd.formulas) element_list = ['Al', 'Fe', 'O'] g_max, charge_balanced = heuristic_setup(element_list) self.assertTrue(charge_balanced) sd = StructureDomain.from_bounds(element_list, charge_balanced=charge_balanced, **{'grid': range(1, g_max)}) self.assertEqual(len(sd.formulas), 15) self.assertIn('Al2Fe2O5', sd.formulas) element_list = ['Al', 'Fe', 'Ti', 'O'] g_max, charge_balanced = heuristic_setup(element_list) self.assertTrue(charge_balanced) sd = StructureDomain.from_bounds(element_list, charge_balanced=charge_balanced, **{'grid': range(1, g_max)}) self.assertEqual(len(sd.formulas), 27) self.assertIn('Al2Fe1Ti3O7', sd.formulas)
def run_proto_dft_campaign(chemsys, s3_prefix="proto-dft-2"): """ Args: chemsys (str): chemical system for the campaign s3_prefix (str): s3 prefix to sync to Returns: (bool): True if run exits """ s3_prefix = "{}/runs/{}".format(s3_prefix, chemsys) # Initialize s3 dumpfn({"started": datetime.now().isoformat(), "version": __version__}, "start.json") s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') try: # Get structure domain element_list = chemsys.split('-') g_max, charge_balanced = heuristic_setup(element_list) domain = StructureDomain.from_bounds( element_list, charge_balanced=charge_balanced, n_max_atoms=20, **{'grid': range(1, g_max)}) candidate_data = domain.candidates() structure_dict = domain.hypo_structures_dict # Dump structure/candidate data with open('candidate_data.pickle', 'wb') as f: pickle.dump(candidate_data, f) with open('structure_dict.pickle', 'wb') as f: pickle.dump(structure_dict, f) # Set up agents and loop parameters agent = AgentStabilityAdaBoost agent_params = { 'ml_algorithm': MLPRegressor, 'ml_algorithm_params': {'hidden_layer_sizes': (84, 50)}, 'n_query': 10, 'hull_distance': 0.2, # Distance to hull to consider a finding as discovery (eV/atom) 'exploit_fraction': 1.0, # Fraction to exploit (rest will be explored -- randomly picked) 'uncertainty': True, 'alpha': 0.5, 'diversify': True, 'n_estimators': 20 } analyzer = AnalyzeStability analyzer_params = {'hull_distance': 0.2} # analysis criterion (need not be exactly same as agent's goal) experiment = OqmdDFTonMC1 experiment_params = {'structure_dict': structure_dict, 'candidate_data': candidate_data, 'timeout': 30000} experiment_params.update({'timeout': 30000}) finalizer = FinalizeQqmdCampaign finalizer_params = {'hull_distance': 0.2} n_max_iter = n_max_iter_heuristics(len(candidate_data), 10) # Construct and start loop new_loop = Loop( candidate_data, agent, experiment, analyzer, agent_params=agent_params, analyzer_params=analyzer_params, experiment_params=experiment_params, finalizer=finalizer, finalizer_params=finalizer_params, heuristic_stopper=5, s3_prefix=s3_prefix) new_loop.auto_loop_in_directories( n_iterations=n_max_iter, timeout=10, monitor=True, initialize=True, with_icsd=True) except Exception as e: error_msg = {"error": "{}".format(e), "traceback": traceback.format_exc()} dumpfn(error_msg, "error.json") dumpfn({"status": "error"}, "job_status.json") s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') return True
def from_chemsys(cls, chemsys, prefix="proto-dft-2/runs", n_max_atoms=20, agent=None, analyzer=None, experiment=None, log_file="campaign.log", cloudwatch_group="/camd/worker/dev/"): """ Class factory method for constructing campaign from chemsys. Args: chemsys (str): chemical system for the campaign prefix (str): prefix for s3 n_max_atoms (int): number of maximum atoms agent (Agent): agent for stability campaign analyzer (Analyzer): analyzer for stability campaign experiment (Agent): experiment for stability campaign log_file (str): log filename cloudwatch_group (str): cloudwatch group to log to Returns: (ProtoDFTCampaign): Standard proto-dft campaign from the chemical system """ logger = logging.Logger("camd") logger.setLevel("INFO") file_handler = logging.FileHandler(log_file) cw_handler = CloudWatchLogHandler(log_group=cloudwatch_group, stream_name=chemsys) logger.addHandler(file_handler) logger.addHandler(cw_handler) logger.addHandler(logging.StreamHandler()) logger.info( "Starting campaign factory from_chemsys {}".format(chemsys)) s3_prefix = "{}/{}".format(prefix, chemsys) # Initialize s3 dumpfn({ "started": datetime.now().isoformat(), "version": __version__ }, "start.json") s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') # Get structure domain element_list = chemsys.split('-') max_coeff, charge_balanced = heuristic_setup(element_list) domain = StructureDomain.from_bounds(element_list, charge_balanced=charge_balanced, n_max_atoms=n_max_atoms, **{'grid': range(1, max_coeff)}) candidate_data = domain.candidates() # Dump structure/candidate data with open('candidate_data.pickle', 'wb') as f: pickle.dump(candidate_data, f) s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') logger.info("Candidates generated") # Set up agents and loop parameters agent = agent or AgentStabilityAdaBoost( model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=10, hull_distance=0.2, exploit_fraction=1.0, uncertainty=True, alpha=0.5, diversify=True, n_estimators=20) analyzer = analyzer or StabilityAnalyzer(hull_distance=0.2) experiment = experiment or OqmdDFTonMC1(timeout=30000) seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") # Construct and start loop return cls(candidate_data=candidate_data, agent=agent, experiment=experiment, analyzer=analyzer, seed_data=seed_data, heuristic_stopper=5, s3_prefix=s3_prefix, logger=logger)
def from_chemsys(cls, chemsys, prefix="proto-dft-2/runs", n_max_atoms=20, agent=None, analyzer=None, experiment=None, log_file="campaign.log", cloudwatch_group="/camd/worker/dev/"): """ Class factory method for constructing campaign from chemsys. Args: chemsys (str): chemical system for the campaign prefix (str): prefix for s3 n_max_atoms (int): number of maximum atoms agent (Agent): agent for stability campaign analyzer (Analyzer): analyzer for stability campaign experiment (Agent): experiment for stability campaign log_file (str): log filename cloudwatch_group (str): cloudwatch group to log to Returns: (ProtoDFTCampaign): Standard proto-dft campaign from the chemical system """ logger = logging.Logger("camd") logger.setLevel("INFO") file_handler = logging.FileHandler(log_file) cw_handler = CloudWatchLogHandler(log_group=cloudwatch_group, stream_name=chemsys) logger.addHandler(file_handler) logger.addHandler(cw_handler) logger.addHandler(logging.StreamHandler()) logger.info( "Starting campaign factory from_chemsys {}".format(chemsys)) s3_prefix = "{}/{}".format(prefix, chemsys) # Initialize s3 dumpfn({ "started": datetime.now().isoformat(), "version": __version__ }, "start.json") s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') # Get structure domain # Check cache cache_key = "protosearch_cache/v1/{}/{}/candidates.pickle".format( chemsys, n_max_atoms) # TODO: create test of isfile if s3_key_exists(bucket=CAMD_S3_BUCKET, key=cache_key): logger.info("Found cached protosearch domain.") candidate_data = pd.read_pickle("s3://{}/{}".format( CAMD_S3_BUCKET, cache_key)) logger.info("Loaded cached {}.".format(cache_key)) else: logger.info( "Generating domain with max {} atoms.".format(n_max_atoms)) element_list = chemsys.split('-') max_coeff, charge_balanced = heuristic_setup(element_list) domain = StructureDomain.from_bounds( element_list, charge_balanced=charge_balanced, n_max_atoms=n_max_atoms, **{'grid': range(1, max_coeff)}) candidate_data = domain.candidates() logger.info("Candidates generated") candidate_data.to_pickle("s3://{}/{}".format( CAMD_S3_BUCKET, cache_key)) logger.info("Cached protosearch domain at {}.".format(cache_key)) # Dump structure/candidate data candidate_data.to_pickle("candidate_data.pickle") s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') # Set up agents and loop parameters agent = agent or AgentStabilityAdaBoost( model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=10, hull_distance=0.2, exploit_fraction=1.0, uncertainty=True, alpha=0.5, diversify=True, n_estimators=20) analyzer = analyzer or StabilityAnalyzer(hull_distance=0.2) experiment = experiment or OqmdDFTonMC1(timeout=30000, prefix_append="proto-dft") seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") # Load cached experiments logger.info("Loading cached experiments") cached_experiments = experiment.fetch_cached(candidate_data) logger.info("Found {} experiments.".format(len(cached_experiments))) if len(cached_experiments) > 0: summary, seed_data = analyzer.analyze(cached_experiments, seed_data) # Remove cached experiments from candidate_data candidate_space = candidate_data.index.difference( cached_experiments.index, sort=False).tolist() candidate_data = candidate_data.loc[candidate_space] logger.info("Cached experiments added to seed.") # Construct and start loop return cls(candidate_data=candidate_data, agent=agent, experiment=experiment, analyzer=analyzer, seed_data=seed_data, heuristic_stopper=5, s3_prefix=s3_prefix, logger=logger)