def process_run(): folder = os.getcwd() if os.path.isfile("error.json"): error = loadfn("error.json") print("{} ERROR: {}".format(folder, error)) required_files = ['seed_data.pickle'] if not all([os.path.isfile(fn) for fn in required_files]): print("{} ERROR: no seed data, no analysis to be done") else: analyzer = StabilityAnalyzer(hull_distance=0.2, parallel=True) with open(os.path.join("seed_data.pickle"), "rb") as f: result_df = pickle.load(f) unique_structures = loadfn("discovered_unique_structures.json") all_result_ids = list(unique_structures.keys()) summary = result_df.loc[all_result_ids] summary = summary[['Composition', 'delta_e']] analyzer.analyze(result_df, all_result_ids=all_result_ids, new_result_ids=all_result_ids) # Add stabilities summary['stabilities'] = pd.Series(analyzer.stabilities) chemsys = os.path.split(folder)[-1] # Get all DFT data response = requests.get('{}/synthesis-discovery/{}/dft-results'.format( API_URL, chemsys)) data = json.loads(response.content.decode('utf-8')) data = pd.DataFrame(data) aggregated = {} for result in data['dft_results']: aggregated.update(result) simulation_data = pd.DataFrame.from_dict(aggregated, orient='index') summary['bandgap'] = simulation_data['bandgap'] # Apply garcia correction summary['bandgap_garcia_exp'] = 1.358 * summary['bandgap'] + 0.904 summary['structure'] = pd.Series(unique_structures) summary['chemsys'] = [ '-'.join(sorted(list(Composition(comp).as_dict().keys()))) for comp in summary['Composition'] ] # Add structure data symmetry_data = { key: get_structure_data(structure) for key, structure in unique_structures.items() } symmetry_df = pd.DataFrame.from_dict(symmetry_data, orient='index') summary = pd.concat([summary, symmetry_df], axis=1) summary['url'] = simulation_data['url'] return summary
def test_gp_bagging(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) df_sub = df[df['N_species'] <= 3] n_seed = 200 # Starting sample size agent = BaggedGaussianProcessStabilityAgent( n_query=10, hull_distance=0.05, alpha=0.5, # Fraction of std to include in expected improvement n_estimators=2, max_samples=195, parallel=False) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(df_sub) candidate_data = df_sub new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertTrue(new_loop.initialized) new_loop.auto_loop(6) self.assertTrue(True)
def test_sync(self): with ScratchDir('.'): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) # Construct and start campaign new_campaign = Campaign(df, AgentStabilityML5(), ATFSampler(df), StabilityAnalyzer(), create_seed=10, s3_prefix="test") new_campaign.auto_loop(n_iterations=3, save_iterations=True, initialize=True) # Test iteration read s3 = boto3.resource('s3') obj = s3.Object(CAMD_S3_BUCKET, "test/iteration.json") loaded = json.loads(obj.get()['Body'].read()) self.assertEqual(loaded, 2) # Test save directories for iteration in [-1, 0, 1, 2]: obj = s3.Object(CAMD_S3_BUCKET, f"test/{iteration}/iteration.json") loaded = json.loads(obj.get()['Body'].read()) self.assertEqual(loaded, iteration)
def test_initialize_and_update(self): agent_pool = ParameterTable(TEST_AGENT_PARAMS) dataframe = get_oqmd_data_by_chemsys("Fe-O") cand, seed = partition_intercomp(dataframe, n_elements=1) analyzer = StabilityAnalyzer() experiment = LocalAgentSimulation( cand, iterations=5, analyzer=analyzer, seed_data=seed ) MetaAgentCampaign.reserve( name="test_meta_agent", experiment=experiment, agent_pool=agent_pool, analyzer=analyzer ) self.assertRaises(ValueError, MetaAgentCampaign.reserve, "test_meta_agent", dataframe, agent_pool, None) agent_pool, data, analyzer = MetaAgentCampaign.load_pickled_objects( "test_meta_agent" ) self.assertEqual(len(agent_pool), 12) MetaAgentCampaign.update_agent_pool( "test_meta_agent", TEST_AGENT_PARAMS ) agent_pool, _, _ = MetaAgentCampaign.load_pickled_objects( "test_meta_agent" ) self.assertEqual(len(agent_pool), 12)
def test_mp_loop(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df_analysis.csv')) df['id'] = [int(mp_id.replace("mp-", "").replace('mvc-', '')) for mp_id in df['id']] df.set_index("id") df['Composition'] = df['formula'] # Just use the Ti-O-N chemsys seed = df.iloc[:38] candidates = df.iloc[38:209] agent = RandomAgent(n_query=20) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(dataframe=df) new_loop = Campaign( candidates, agent, experiment, analyzer, seed_data=seed ) new_loop.initialize() for iteration in range(6): new_loop.run() self.assertTrue(os.path.isfile("hull.png")) if iteration >= 1: self.assertTrue( os.path.isfile("history.pickle")) # Testing the continuation new_loop = Campaign(df, agent, experiment, analyzer) self.assertTrue(new_loop.initialized) self.assertEqual(new_loop.iteration, 6) self.assertEqual(new_loop.loop_state, None) new_loop.run() self.assertTrue(True) self.assertEqual(new_loop.iteration, 7)
def test_simulated(self): exp_dataframe = pd.read_pickle( os.path.join(CAMD_TEST_FILES, "mn-ni-o-sb.pickle")) experiment = ATFSampler(exp_dataframe) candidate_data = exp_dataframe.iloc[:, :-11] # Set up agents and loop parameters agent = AgentStabilityAdaBoost( model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=2, hull_distance=0.2, exploit_fraction=1.0, uncertainty=True, alpha=0.5, diversify=True, n_estimators=20) analyzer = StabilityAnalyzer(hull_distance=0.2) # Reduce seed_data icsd_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") seed_data = filter_dataframe_by_composition(icsd_data, "MnNiOSb") leftover = ~icsd_data.index.isin(seed_data.index) # Add some random other data to test compositional flexibility seed_data = seed_data.append(icsd_data.loc[leftover].sample(30)) del icsd_data with ScratchDir('.'): campaign = ProtoDFTCampaign(candidate_data=candidate_data, agent=agent, experiment=experiment, analyzer=analyzer, seed_data=seed_data, heuristic_stopper=5) campaign.autorun() self.assertTrue(os.path.isfile('hull_finalized.png'))
def test_random_agent_loop(self): df = load_default_atf_data() n_seed = 200 # Starting sample size agent = RandomAgent(n_query=10) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(dataframe=df) candidate_data = df new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertFalse(new_loop.create_seed) for _ in range(6): new_loop.run() self.assertTrue(True) # Testing the continuation new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) self.assertTrue(new_loop.initialized) self.assertEqual(new_loop.iteration, 6) self.assertEqual(new_loop.loop_state, None) new_loop.run() self.assertTrue(True) self.assertEqual(new_loop.iteration, 7)
def from_chemsys(cls, chemsys): """ Class factory method for constructing campaign from chemsys. Args: chemsys (str): chemical system for the campaign Returns: (ProtoDFTCampaign): Standard proto-dft campaign from the chemical system """ s3_prefix = "proto-dft-2/runs/{}".format(chemsys) # Initialize s3 dumpfn({ "started": datetime.now().isoformat(), "version": __version__ }, "start.json") s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') # Get structure domain element_list = chemsys.split('-') max_coeff, charge_balanced = heuristic_setup(element_list) domain = StructureDomain.from_bounds(element_list, charge_balanced=charge_balanced, n_max_atoms=20, **{'grid': range(1, max_coeff)}) candidate_data = domain.candidates() # Dump structure/candidate data with open('candidate_data.pickle', 'wb') as f: pickle.dump(candidate_data, f) # Set up agents and loop parameters agent = AgentStabilityAdaBoost( model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=10, hull_distance=0.2, exploit_fraction=1.0, uncertainty=True, alpha=0.5, diversify=True, n_estimators=20) analyzer = StabilityAnalyzer(hull_distance=0.2) experiment = OqmdDFTonMC1(timeout=30000) seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") # Construct and start loop return cls(candidate_data=candidate_data, agent=agent, experiment=experiment, analyzer=analyzer, seed_data=seed_data, heuristic_stopper=5, s3_prefix="proto-dft/runs/{}".format(chemsys))
def update_run(folder): """ Updates existing runs in s3 to include plots Returns: List of modified chemsys """ required_files = ["seed_data.pickle", "report.log"] with cd(folder): if os.path.isfile("error.json"): error = loadfn("error.json") print("{} ERROR: {}".format(folder, error)) if not all([os.path.isfile(fn) for fn in required_files]): print("{} ERROR: no seed data, no analysis to be done") else: analyzer = StabilityAnalyzer(hull_distance=0.2) # Generate report plots for iteration in range(0, 25): print("{}: {}".format(folder, iteration)) if not os.path.isdir(str(iteration)) or not os.path.isdir( str(iteration - 1)): continue with open(os.path.join(str(iteration), "seed_data.pickle"), "rb") as f: result_df = pickle.load(f) all_result_ids = loadfn( os.path.join(str(iteration - 1), "consumed_candidates.json")) new_result_ids = loadfn( os.path.join(str(iteration - 1), "submitted_experiment_requests.json")) analyzer.plot_hull(df=result_df, new_result_ids=new_result_ids, all_result_ids=all_result_ids, filename="hull_{}.png".format(iteration), finalize=False) Campaign.generate_report_plot()
def test_run(self): with ScratchDir('.'): dataframe = get_oqmd_data_by_chemsys("Fe-O") cand, seed = partition_intercomp(dataframe, n_elements=1) agents_df = pd.DataFrame({"agent": [RandomAgent()]}) simulation = LocalAgentSimulation( cand, iterations=5, seed_data=seed, analyzer=StabilityAnalyzer()) simulation.submit(agents_df) simulation.monitor() results = simulation.get_results() self.assertTrue(True)
def test_analyze(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, "test_df_analysis.csv"), index_col="id") df['Composition'] = df['formula'] analyzer = StabilityAnalyzer(hull_distance=0.1) seed_data = filter_dataframe_by_composition(df, "TiNO") # TODO: resolve drop_duplicates filtering mp data seed_data = seed_data.drop_duplicates(keep='last').dropna() new_exp_indices = ["mp-30998", "mp-572822"] new_experimental_results = seed_data.loc[new_exp_indices] seed_data = seed_data.drop(index=new_exp_indices) summary, seed_data = analyzer.analyze( new_experimental_results=seed_data, seed_data=pd.DataFrame(), ) summary, new_seed = analyzer.analyze( new_experimental_results=new_experimental_results, seed_data=seed_data ) self.assertAlmostEqual(new_seed.loc['mp-30998', 'stability'], 0) self.assertAlmostEqual(new_seed.loc["mp-572822", 'stability'], 0.52784795) self.assertTrue(new_seed.loc['mp-30998', 'is_stable']) self.assertFalse(new_seed.loc["mp-572822", 'is_stable'])
def test_svgp_loop(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) df_sub = df[df['N_species'] <= 3] n_seed = 200 # Starting sample size agent = SVGProcessStabilityAgent(n_query=10, hull_distance=0.05, alpha=0.5, M=100) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(df_sub) candidate_data = df_sub new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertTrue(new_loop.initialized) new_loop.auto_loop(3) self.assertTrue(True)
def test_sync(self): with ScratchDir('.'): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) # Construct and start campaign new_campaign = Campaign(df, AgentStabilityML5(), ATFSampler(df), StabilityAnalyzer(), create_seed=10, s3_prefix="test") new_campaign.initialize() s3 = boto3.resource('s3') obj = s3.Object(CAMD_S3_BUCKET, "test/iteration.json") loaded = json.loads(obj.get()['Body'].read()) self.assertEqual(loaded, 0)
def test_qbc_agent_loop(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) df_sub = df[df['N_species'] <= 3] n_seed = 200 # Starting sample size agent = QBCStabilityAgent(model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=10, hull_distance=0.05, alpha=0.5) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(dataframe=df_sub) candidate_data = df_sub new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertTrue(new_loop.initialized) new_loop.auto_loop(3) self.assertTrue(True)
def test_random_agent_loop(self): df = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") n_seed = 5000 agent = RandomAgent(n_query=200) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(dataframe=df) candidate_data = df new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertFalse(new_loop.create_seed) for _ in range(6): new_loop.run() self.assertTrue(True)
def test_simple_gp_loop(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) df_sub = df[df['N_species'] <= 3] n_seed = 200 # Starting sample size n_query = 10 # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT) agent = GaussianProcessStabilityAgent(n_query=n_query, hull_distance=0.05, alpha=0.5, parallel=False) analyzer = StabilityAnalyzer(hull_distance=0.05, parallel=False) experiment = ATFSampler(dataframe=df_sub) candidate_data = df_sub new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.initialize() self.assertTrue(new_loop.initialized) new_loop.auto_loop(2) self.assertTrue(True)
def test_simulated(self): exp_dataframe = pd.read_pickle( os.path.join(CAMD_TEST_FILES, "mn-ni-o-sb.pickle")) experiment = ATFSampler(exp_dataframe) candidate_data = exp_dataframe.iloc[:, :-11] agent = RandomAgent(n_query=2) analyzer = StabilityAnalyzer(hull_distance=0.2) # Reduce seed_data seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") seed_data = filter_dataframe_by_composition(seed_data, "MnNiOSb") with ScratchDir('.'): campaign = ProtoDFTCampaign(candidate_data=candidate_data, agent=agent, experiment=experiment, analyzer=analyzer, seed_data=seed_data, heuristic_stopper=5) campaign.autorun()
def from_chemsys(cls, chemsys): """ Args: chemsys: Returns: """ s3_prefix = "oqmd-atf/runs/{}".format(chemsys) df = pd.read_csv(os.path.join(CAMD_TEST_FILES, 'test_df.csv')) n_seed = 200 # Starting sample size n_query = 10 # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT) agent = RandomAgent(n_query=n_query) analyzer = StabilityAnalyzer(hull_distance=0.05) experiment = ATFSampler(dataframe=df) candidate_data = df return cls(candidate_data, agent, experiment, analyzer, create_seed=n_seed, s3_prefix=s3_prefix)
def test_run(self): agent_pool = ParameterTable(RANDOM_TEST_AGENT_PARAMS) # Construct experiment dataframe = get_oqmd_data_by_chemsys("Fe-O") cand, seed = partition_intercomp(dataframe, n_elements=1) experiment = LocalAgentSimulation( atf_candidate_data=cand, seed_data=seed, analyzer=StabilityAnalyzer(), iterations=10, ) analyzer = StabilityCampaignAnalyzer(checkpoint_indices=[2, 5, 10]) MetaAgentCampaign.reserve( name="test_meta_agent", experiment=experiment, agent_pool=agent_pool, analyzer=analyzer ) with ScratchDir('.'): print("Testing meta agent") campaign = MetaAgentCampaign.from_reserved_name( "test_meta_agent", meta_agent=RandomAgent(n_query=1), ) campaign.autorun() self.assertTrue(True)
def test_plot_hull(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, "test_df_analysis.csv"), index_col="id") df['Composition'] = df['formula'] # Test 2D with ScratchDir('.'): analyzer = StabilityAnalyzer(hull_distance=0.1) filtered = filter_dataframe_by_composition(df, "TiO") analyzer.plot_hull(filtered, new_result_ids=["mp-685151", "mp-755875"], filename="hull.png") self.assertTrue(os.path.isfile("hull.png")) # Test 3D with ScratchDir('.'): analyzer.hull_distance = 0.05 filtered = filter_dataframe_by_composition(df, "TiNO") analyzer.plot_hull(filtered, new_result_ids=["mp-776280", "mp-30998"], filename="hull.png") self.assertTrue(os.path.isfile("hull.png"))
def from_chemsys(cls, chemsys, prefix="proto-dft-2/runs", n_max_atoms=20, agent=None, analyzer=None, experiment=None, log_file="campaign.log", cloudwatch_group="/camd/worker/dev/"): """ Class factory method for constructing campaign from chemsys. Args: chemsys (str): chemical system for the campaign prefix (str): prefix for s3 n_max_atoms (int): number of maximum atoms agent (Agent): agent for stability campaign analyzer (Analyzer): analyzer for stability campaign experiment (Agent): experiment for stability campaign log_file (str): log filename cloudwatch_group (str): cloudwatch group to log to Returns: (ProtoDFTCampaign): Standard proto-dft campaign from the chemical system """ logger = logging.Logger("camd") logger.setLevel("INFO") file_handler = logging.FileHandler(log_file) cw_handler = CloudWatchLogHandler(log_group=cloudwatch_group, stream_name=chemsys) logger.addHandler(file_handler) logger.addHandler(cw_handler) logger.addHandler(logging.StreamHandler()) logger.info( "Starting campaign factory from_chemsys {}".format(chemsys)) s3_prefix = "{}/{}".format(prefix, chemsys) # Initialize s3 dumpfn({ "started": datetime.now().isoformat(), "version": __version__ }, "start.json") s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') # Get structure domain element_list = chemsys.split('-') max_coeff, charge_balanced = heuristic_setup(element_list) domain = StructureDomain.from_bounds(element_list, charge_balanced=charge_balanced, n_max_atoms=n_max_atoms, **{'grid': range(1, max_coeff)}) candidate_data = domain.candidates() # Dump structure/candidate data with open('candidate_data.pickle', 'wb') as f: pickle.dump(candidate_data, f) s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') logger.info("Candidates generated") # Set up agents and loop parameters agent = agent or AgentStabilityAdaBoost( model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=10, hull_distance=0.2, exploit_fraction=1.0, uncertainty=True, alpha=0.5, diversify=True, n_estimators=20) analyzer = analyzer or StabilityAnalyzer(hull_distance=0.2) experiment = experiment or OqmdDFTonMC1(timeout=30000) seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") # Construct and start loop return cls(candidate_data=candidate_data, agent=agent, experiment=experiment, analyzer=analyzer, seed_data=seed_data, heuristic_stopper=5, s3_prefix=s3_prefix, logger=logger)
def from_chemsys(cls, chemsys, prefix="proto-dft-2/runs", n_max_atoms=20, agent=None, analyzer=None, experiment=None, log_file="campaign.log", cloudwatch_group="/camd/worker/dev/"): """ Class factory method for constructing campaign from chemsys. Args: chemsys (str): chemical system for the campaign prefix (str): prefix for s3 n_max_atoms (int): number of maximum atoms agent (Agent): agent for stability campaign analyzer (Analyzer): analyzer for stability campaign experiment (Agent): experiment for stability campaign log_file (str): log filename cloudwatch_group (str): cloudwatch group to log to Returns: (ProtoDFTCampaign): Standard proto-dft campaign from the chemical system """ logger = logging.Logger("camd") logger.setLevel("INFO") file_handler = logging.FileHandler(log_file) cw_handler = CloudWatchLogHandler(log_group=cloudwatch_group, stream_name=chemsys) logger.addHandler(file_handler) logger.addHandler(cw_handler) logger.addHandler(logging.StreamHandler()) logger.info( "Starting campaign factory from_chemsys {}".format(chemsys)) s3_prefix = "{}/{}".format(prefix, chemsys) # Initialize s3 dumpfn({ "started": datetime.now().isoformat(), "version": __version__ }, "start.json") s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') # Get structure domain # Check cache cache_key = "protosearch_cache/v1/{}/{}/candidates.pickle".format( chemsys, n_max_atoms) # TODO: create test of isfile if s3_key_exists(bucket=CAMD_S3_BUCKET, key=cache_key): logger.info("Found cached protosearch domain.") candidate_data = pd.read_pickle("s3://{}/{}".format( CAMD_S3_BUCKET, cache_key)) logger.info("Loaded cached {}.".format(cache_key)) else: logger.info( "Generating domain with max {} atoms.".format(n_max_atoms)) element_list = chemsys.split('-') max_coeff, charge_balanced = heuristic_setup(element_list) domain = StructureDomain.from_bounds( element_list, charge_balanced=charge_balanced, n_max_atoms=n_max_atoms, **{'grid': range(1, max_coeff)}) candidate_data = domain.candidates() logger.info("Candidates generated") candidate_data.to_pickle("s3://{}/{}".format( CAMD_S3_BUCKET, cache_key)) logger.info("Cached protosearch domain at {}.".format(cache_key)) # Dump structure/candidate data candidate_data.to_pickle("candidate_data.pickle") s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') # Set up agents and loop parameters agent = agent or AgentStabilityAdaBoost( model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=10, hull_distance=0.2, exploit_fraction=1.0, uncertainty=True, alpha=0.5, diversify=True, n_estimators=20) analyzer = analyzer or StabilityAnalyzer(hull_distance=0.2) experiment = experiment or OqmdDFTonMC1(timeout=30000, prefix_append="proto-dft") seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") # Load cached experiments logger.info("Loading cached experiments") cached_experiments = experiment.fetch_cached(candidate_data) logger.info("Found {} experiments.".format(len(cached_experiments))) if len(cached_experiments) > 0: summary, seed_data = analyzer.analyze(cached_experiments, seed_data) # Remove cached experiments from candidate_data candidate_space = candidate_data.index.difference( cached_experiments.index, sort=False).tolist() candidate_data = candidate_data.loc[candidate_space] logger.info("Cached experiments added to seed.") # Construct and start loop return cls(candidate_data=candidate_data, agent=agent, experiment=experiment, analyzer=analyzer, seed_data=seed_data, heuristic_stopper=5, s3_prefix=s3_prefix, logger=logger)
from sklearn.neural_network import MLPRegressor from camd.agent.stability import AgentStabilityML5 from camd.analysis import StabilityAnalyzer from camd.experiment.base import ATFSampler from camd.utils.data import load_default_atf_data ########################################################## # Load dataset and filter by N_species of 2 or less ########################################################## df = load_default_atf_data() ## Epsilon-Greedy n_seed = 5000 # Starting sample size - a seed of this size will be randomly chosen. n_query = 200 # This many new candidates are "calculated with DFT" (i.e. requested from Oracle -- DFT) agent = AgentStabilityML5(model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=n_query, hull_distance=0.05, exploit_fraction=0.5) analyzer = StabilityAnalyzer(hull_distance=0.05) experiment = ATFSampler(dataframe=df) candidate_data = df ########################################################## new_loop = Campaign(candidate_data, agent, experiment, analyzer, create_seed=n_seed) new_loop.auto_loop(n_iterations=4, initialize=True)