def test_invalid_action(self): formula = Formula('H2CO') env = MolecularEnvironment(reward=self.reward, observation_space=self.observation_space, action_space=self.action_space, formulas=[formula]) action = self.action_space.from_atom( Atom(symbol='He', position=(0, 1, 0))) with self.assertRaises(KeyError): env.step(action)
def test_addition(self): formula = string_to_formula('H2CO') env = MolecularEnvironment(reward=self.reward, observation_space=self.observation_space, action_space=self.action_space, formulas=[formula]) action = self.action_space.from_atom( Atom(symbol='H', position=(0.0, 1.0, 0.0))) obs, reward, done, info = env.step(action=action) atoms1, formula = self.observation_space.parse(obs) self.assertEqual(atoms1[0].symbol, 'H') self.assertEqual(formula, ((0, 0), (1, 1), (6, 1), (7, 0), (8, 1))) self.assertEqual(reward, 0.0) self.assertFalse(done)
def test_refilling(self): formula = Formula('H2O') env = MolecularEnvironment( reward=self.reward, observation_space=self.observation_space, action_space=self.action_space, formulas=[formula], max_h_distance=1.0, bag_refills=5, initial_formula=Formula('H2O'), ) action = self.action_space.from_atom( atom=Atom(symbol='H', position=(1.0, 0, 0))) obs, reward, done, info = env.step(action=action) self.assertFalse(done)
def test_invalid_formula(self): formula = Formula('He2') with self.assertRaises(ValueError): MolecularEnvironment(reward=self.reward, observation_space=self.observation_space, action_space=self.action_space, formulas=[formula])
def test_h_distance(self): formula = Formula('H2CO') env = MolecularEnvironment( reward=self.reward, observation_space=self.observation_space, action_space=self.action_space, formulas=[formula], max_h_distance=1.0, ) # First H can be on its own action = self.action_space.from_atom( atom=Atom(symbol='H', position=(0, 0, 0))) obs, reward, done, info = env.step(action=action) self.assertFalse(done) # Second H cannot action = self.action_space.from_atom( atom=Atom(symbol='H', position=(0, 1.5, 0))) obs, reward, done, info = env.step(action=action) self.assertTrue(done)
def test_addition(self): formula = Formula('H2CO') env = MolecularEnvironment(reward=self.reward, observation_space=self.observation_space, action_space=self.action_space, formulas=[formula]) action = self.action_space.from_atom( Atom(symbol='H', position=(0.0, 1.0, 0.0))) obs, reward, done, info = env.step(action=action) atoms1, f1 = self.observation_space.parse(obs) self.assertEqual(atoms1[0].symbol, 'H') self.assertDictEqual(f1.count(), { 'H': 1, 'C': 1, 'O': 1, 'N': 0, 'X': 0 }) self.assertEqual(reward, 0.0) self.assertFalse(done)
def main() -> None: config = get_config() bag_symbols = config['bag_symbols'].split(',') action_space = ActionSpace() observation_space = ObservationSpace(canvas_size=config['canvas_size'], symbols=bag_symbols) model = load_specific_model(model_path=config['loaded_model_name']) model.action_space = action_space model.observation_space = observation_space reward = InteractionReward(config['rho']) if not config['eval_formulas']: config['eval_formulas'] = config['formulas'] eval_formulas = parse_formulas(config['eval_formulas']) eval_init_formulas = parse_formulas(config['eval_formulas']) eval_env = MolecularEnvironment( reward=reward, observation_space=observation_space, action_space=action_space, formulas=eval_formulas, min_atomic_distance=config['min_atomic_distance'], max_h_distance=config['max_h_distance'], min_reward=config['min_reward'], initial_formula=eval_init_formulas, bag_refills=config['bag_refills'], ) eval_buffer_size = 1000 eval_buffer = PPOBuffer(int_act_dim=model.internal_action_dim, size=eval_buffer_size, gamma=config['discount'], lam=config['lam']) with torch.no_grad(): model.training = False rollout_info = rollout(model, eval_env, eval_buffer, num_episodes=1) model.training = True logging.info('Evaluation rollout: ' + str(rollout_info)) atoms, _ = eval_env.observation_space.parse( eval_buffer.next_obs_buf[eval_buffer.ptr - 1]) print(atoms) io.write('/home/energy/s153999/evaluated_structure.traj', atoms)
def main() -> None: util.set_one_thread() # torch.set_num_threads(24) config = get_config() util.create_directories([config['log_dir'], config['model_dir'], config['data_dir'], config['results_dir'], config['structures_dir']]) tag = util.get_tag(config) util.setup_logger(config, directory=config['log_dir'], tag=tag) util.save_config(config, directory=config['log_dir'], tag=tag) util.set_seeds(seed=config['seed'] + mpi.get_proc_rank()) model_handler = util.ModelIO(directory=config['model_dir'], tag=tag) bag_symbols = config['bag_symbols'].split(',') action_space = ActionSpace() observation_space = ObservationSpace(canvas_size=config['canvas_size'], symbols=bag_symbols) start_num_steps = 0 if config['loaded_model_name']: model = load_specific_model(model_path=config['loaded_model_name']) model.action_space = action_space model.observation_space = observation_space else: if not config['load_model']: model = build_model(config, observation_space=observation_space, action_space=action_space) else: model, start_num_steps = model_handler.load() model.action_space = action_space model.observation_space = observation_space mpi.sync_params(model) var_counts = util.count_vars(model) logging.info(f'Number of parameters: {var_counts}') reward = InteractionReward(config['rho']) # Evaluation formulas if not config['eval_formulas']: config['eval_formulas'] = config['formulas'] train_formulas = parse_formulas(config['formulas']) eval_formulas = parse_formulas(config['eval_formulas']) train_init_formulas = parse_formulas(config['formulas']) eval_init_formulas = parse_formulas(config['eval_formulas']) logging.info(f'Training bags: {train_formulas}') logging.info(f'Evaluation bags: {eval_formulas}') # Number of episodes during evaluation if not config['num_eval_episodes']: config['num_eval_episodes'] = len(eval_formulas) env = MolecularEnvironment( reward=reward, observation_space=observation_space, action_space=action_space, formulas=train_formulas, min_atomic_distance=config['min_atomic_distance'], max_h_distance=config['max_h_distance'], min_reward=config['min_reward'], initial_formula=train_init_formulas, bag_refills=config['bag_refills'], ) eval_env = MolecularEnvironment( reward=reward, observation_space=observation_space, action_space=action_space, formulas=eval_formulas, min_atomic_distance=config['min_atomic_distance'], max_h_distance=config['max_h_distance'], min_reward=config['min_reward'], initial_formula=eval_init_formulas, bag_refills=config['bag_refills'], ) rollout_saver = RolloutSaver(directory=config['data_dir'], tag=tag, all_ranks=config['all_ranks']) info_saver = InfoSaver(directory=config['results_dir'], tag=tag) image_saver = StructureSaver(directory=config['structures_dir'], tag=tag) ppo( env=env, eval_env=eval_env, ac=model, gamma=config['discount'], start_num_steps=start_num_steps, max_num_steps=config['max_num_steps'], num_steps_per_iter=config['num_steps_per_iter'], clip_ratio=config['clip_ratio'], learning_rate=config['learning_rate'], vf_coef=config['vf_coef'], entropy_coef=config['entropy_coef'], max_num_train_iters=config['max_num_train_iters'], lam=config['lam'], target_kl=config['target_kl'], gradient_clip=config['gradient_clip'], eval_freq=config['eval_freq'], model_handler=model_handler, save_freq=config['save_freq'], num_eval_episodes=config['num_eval_episodes'], rollout_saver=rollout_saver, save_train_rollout=config['save_rollouts'] == 'train' or config['save_rollouts'] == 'all', save_eval_rollout=config['save_rollouts'] == 'eval' or config['save_rollouts'] == 'all', info_saver=info_saver, structure_saver=image_saver, )
def main() -> None: config = get_config() util.create_directories([config['log_dir'], config['model_dir'], config['data_dir'], config['results_dir']]) tag = util.get_tag(config) util.setup_logger(config, directory=config['log_dir'], tag=tag) util.save_config(config, directory=config['log_dir'], tag=tag) util.set_seeds(seed=config['seed']) device = util.init_device(config['device']) zs = [ase.data.atomic_numbers[s] for s in config['symbols'].split(',')] action_space = ActionSpace(zs=zs) observation_space = ObservationSpace(canvas_size=config['canvas_size'], zs=zs) # Evaluation formulas if not config['eval_formulas']: config['eval_formulas'] = config['formulas'] train_formulas = util.split_formula_strings(config['formulas']) eval_formulas = util.split_formula_strings(config['eval_formulas']) logging.info(f'Training bags: {train_formulas}') logging.info(f'Evaluation bags: {eval_formulas}') model_handler = ModelIO(directory=config['model_dir'], tag=tag, keep=config['keep_models']) if config['load_latest']: model, start_num_steps = model_handler.load_latest(device=device) model.action_space = action_space model.observation_space = observation_space elif config['load_model'] is not None: model, start_num_steps = model_handler.load(device=device, path=config['load_model']) model.action_space = action_space model.observation_space = observation_space else: model = build_model(config, observation_space=observation_space, action_space=action_space, device=device) start_num_steps = 0 var_counts = util.count_vars(model) logging.info(f'Number of parameters: {var_counts}') reward = InteractionReward() # Number of episodes during evaluation if not config['num_eval_episodes']: config['num_eval_episodes'] = len(eval_formulas) training_envs = SimpleEnvContainer([ MolecularEnvironment( reward=reward, observation_space=observation_space, action_space=action_space, formulas=[util.string_to_formula(f) for f in train_formulas], min_atomic_distance=config['min_atomic_distance'], max_solo_distance=config['max_solo_distance'], min_reward=config['min_reward'], ) for _ in range(config['num_envs']) ]) eval_envs = SimpleEnvContainer([ MolecularEnvironment( reward=reward, observation_space=observation_space, action_space=action_space, formulas=[util.string_to_formula(f) for f in eval_formulas], min_atomic_distance=config['min_atomic_distance'], max_solo_distance=config['max_solo_distance'], min_reward=config['min_reward'], ) ]) batch_ppo( envs=training_envs, eval_envs=eval_envs, ac=model, optimizer=util.get_optimizer(name=config['optimizer'], learning_rate=config['learning_rate'], parameters=model.parameters()), gamma=config['discount'], start_num_steps=start_num_steps, max_num_steps=config['max_num_steps'], num_steps_per_iter=config['num_steps_per_iter'], mini_batch_size=config['mini_batch_size'], clip_ratio=config['clip_ratio'], vf_coef=config['vf_coef'], entropy_coef=config['entropy_coef'], max_num_train_iters=config['max_num_train_iters'], lam=config['lam'], target_kl=config['target_kl'], gradient_clip=config['gradient_clip'], eval_freq=config['eval_freq'], model_handler=model_handler, save_freq=config['save_freq'], num_eval_episodes=config['num_eval_episodes'], rollout_saver=util.RolloutSaver(directory=config['data_dir'], tag=tag), save_train_rollout=config['save_rollouts'] == 'train' or config['save_rollouts'] == 'all', save_eval_rollout=config['save_rollouts'] == 'eval' or config['save_rollouts'] == 'all', info_saver=util.InfoSaver(directory=config['results_dir'], tag=tag), device=device, )