Exemplo n.º 1
0
def main() -> None:
    config = get_config()

    bag_symbols = config['bag_symbols'].split(',')
    action_space = ActionSpace()
    observation_space = ObservationSpace(canvas_size=config['canvas_size'],
                                         symbols=bag_symbols)

    model = load_specific_model(model_path=config['loaded_model_name'])
    model.action_space = action_space
    model.observation_space = observation_space

    reward = InteractionReward(config['rho'])

    if not config['eval_formulas']:
        config['eval_formulas'] = config['formulas']

    eval_formulas = parse_formulas(config['eval_formulas'])

    eval_init_formulas = parse_formulas(config['eval_formulas'])

    eval_env = MolecularEnvironment(
        reward=reward,
        observation_space=observation_space,
        action_space=action_space,
        formulas=eval_formulas,
        min_atomic_distance=config['min_atomic_distance'],
        max_h_distance=config['max_h_distance'],
        min_reward=config['min_reward'],
        initial_formula=eval_init_formulas,
        bag_refills=config['bag_refills'],
    )

    eval_buffer_size = 1000
    eval_buffer = PPOBuffer(int_act_dim=model.internal_action_dim,
                            size=eval_buffer_size,
                            gamma=config['discount'],
                            lam=config['lam'])

    with torch.no_grad():
        model.training = False
        rollout_info = rollout(model, eval_env, eval_buffer, num_episodes=1)
        model.training = True
        logging.info('Evaluation rollout: ' + str(rollout_info))

        atoms, _ = eval_env.observation_space.parse(
            eval_buffer.next_obs_buf[eval_buffer.ptr - 1])
        print(atoms)
        io.write('/home/energy/s153999/evaluated_structure.traj', atoms)
Exemplo n.º 2
0
class TestReward(TestCase):
    RESOURCES = pkg_resources.resource_filename(__package__, RESOURCES_FOLDER)

    def setUp(self):
        self.reward = InteractionReward()

    def test_calculation(self):
        reward, info = self.reward.calculate(Atoms(), Atom('H'))
        self.assertEqual(reward, 0)

    def test_h2(self):
        atom1 = Atom('H', position=(0, 0, 0))
        atom2 = Atom('H', position=(1, 0, 0))

        atoms = Atoms()
        atoms.append(atom1)

        reward, info = self.reward.calculate(atoms, atom2)

        self.assertAlmostEqual(reward, 0.1696435)

    def test_addition(self):
        atom1 = Atom('H', position=(0, 0, 0))
        atom2 = Atom('H', position=(1, 0, 0))
        atom3 = Atom('H', position=(2, 0, 0))

        atoms = Atoms()
        atoms.append(atom1)

        reward1, _ = self.reward.calculate(atoms, atom2)
        atoms.append(atom2)

        reward2, _ = self.reward.calculate(atoms, atom3)
        atoms.append(atom3)

        self.assertAlmostEqual(reward1 + reward2, 0.2141968)
Exemplo n.º 3
0
 def setUp(self):
     self.reward = InteractionReward()
Exemplo n.º 4
0
 def setUp(self):
     self.reward = InteractionReward()
     self.symbols = ['H', 'C', 'N', 'O']
     self.observation_space = ObservationSpace(canvas_size=5,
                                               symbols=self.symbols)
     self.action_space = ActionSpace()
Exemplo n.º 5
0
 def setUp(self):
     self.reward = InteractionReward()
     self.zs = [0, 1, 6, 7, 8]
     self.observation_space = ObservationSpace(canvas_size=5, zs=self.zs)
     self.action_space = ActionSpace(zs=self.zs)
Exemplo n.º 6
0
def main() -> None:
    util.set_one_thread()
    # torch.set_num_threads(24)

    config = get_config()

    util.create_directories([config['log_dir'], config['model_dir'], config['data_dir'],
                             config['results_dir'], config['structures_dir']])

    tag = util.get_tag(config)
    util.setup_logger(config, directory=config['log_dir'], tag=tag)
    util.save_config(config, directory=config['log_dir'], tag=tag)

    util.set_seeds(seed=config['seed'] + mpi.get_proc_rank())

    model_handler = util.ModelIO(directory=config['model_dir'], tag=tag)

    bag_symbols = config['bag_symbols'].split(',')
    action_space = ActionSpace()
    observation_space = ObservationSpace(canvas_size=config['canvas_size'], symbols=bag_symbols)

    start_num_steps = 0

    if config['loaded_model_name']:
        model = load_specific_model(model_path=config['loaded_model_name'])
        model.action_space = action_space
        model.observation_space = observation_space
    else:
        if not config['load_model']:
            model = build_model(config, observation_space=observation_space, action_space=action_space)
        else:
            model, start_num_steps = model_handler.load()
            model.action_space = action_space
            model.observation_space = observation_space

    mpi.sync_params(model)

    var_counts = util.count_vars(model)
    logging.info(f'Number of parameters: {var_counts}')

    reward = InteractionReward(config['rho'])

    # Evaluation formulas
    if not config['eval_formulas']:
        config['eval_formulas'] = config['formulas']

    train_formulas = parse_formulas(config['formulas'])
    eval_formulas = parse_formulas(config['eval_formulas'])

    train_init_formulas = parse_formulas(config['formulas'])
    eval_init_formulas = parse_formulas(config['eval_formulas'])

    logging.info(f'Training bags: {train_formulas}')
    logging.info(f'Evaluation bags: {eval_formulas}')

    # Number of episodes during evaluation
    if not config['num_eval_episodes']:
        config['num_eval_episodes'] = len(eval_formulas)

    env = MolecularEnvironment(
        reward=reward,
        observation_space=observation_space,
        action_space=action_space,
        formulas=train_formulas,
        min_atomic_distance=config['min_atomic_distance'],
        max_h_distance=config['max_h_distance'],
        min_reward=config['min_reward'],
        initial_formula=train_init_formulas,
        bag_refills=config['bag_refills'],
    )

    eval_env = MolecularEnvironment(
        reward=reward,
        observation_space=observation_space,
        action_space=action_space,
        formulas=eval_formulas,
        min_atomic_distance=config['min_atomic_distance'],
        max_h_distance=config['max_h_distance'],
        min_reward=config['min_reward'],
        initial_formula=eval_init_formulas,
        bag_refills=config['bag_refills'],
    )

    rollout_saver = RolloutSaver(directory=config['data_dir'], tag=tag, all_ranks=config['all_ranks'])
    info_saver = InfoSaver(directory=config['results_dir'], tag=tag)
    image_saver = StructureSaver(directory=config['structures_dir'], tag=tag)

    ppo(
        env=env,
        eval_env=eval_env,
        ac=model,
        gamma=config['discount'],
        start_num_steps=start_num_steps,
        max_num_steps=config['max_num_steps'],
        num_steps_per_iter=config['num_steps_per_iter'],
        clip_ratio=config['clip_ratio'],
        learning_rate=config['learning_rate'],
        vf_coef=config['vf_coef'],
        entropy_coef=config['entropy_coef'],
        max_num_train_iters=config['max_num_train_iters'],
        lam=config['lam'],
        target_kl=config['target_kl'],
        gradient_clip=config['gradient_clip'],
        eval_freq=config['eval_freq'],
        model_handler=model_handler,
        save_freq=config['save_freq'],
        num_eval_episodes=config['num_eval_episodes'],
        rollout_saver=rollout_saver,
        save_train_rollout=config['save_rollouts'] == 'train' or config['save_rollouts'] == 'all',
        save_eval_rollout=config['save_rollouts'] == 'eval' or config['save_rollouts'] == 'all',
        info_saver=info_saver,
        structure_saver=image_saver,
    )
Exemplo n.º 7
0
def main() -> None:
    config = get_config()

    util.create_directories([config['log_dir'], config['model_dir'], config['data_dir'], config['results_dir']])

    tag = util.get_tag(config)
    util.setup_logger(config, directory=config['log_dir'], tag=tag)
    util.save_config(config, directory=config['log_dir'], tag=tag)

    util.set_seeds(seed=config['seed'])
    device = util.init_device(config['device'])

    zs = [ase.data.atomic_numbers[s] for s in config['symbols'].split(',')]
    action_space = ActionSpace(zs=zs)
    observation_space = ObservationSpace(canvas_size=config['canvas_size'], zs=zs)

    # Evaluation formulas
    if not config['eval_formulas']:
        config['eval_formulas'] = config['formulas']

    train_formulas = util.split_formula_strings(config['formulas'])
    eval_formulas = util.split_formula_strings(config['eval_formulas'])

    logging.info(f'Training bags: {train_formulas}')
    logging.info(f'Evaluation bags: {eval_formulas}')

    model_handler = ModelIO(directory=config['model_dir'], tag=tag, keep=config['keep_models'])

    if config['load_latest']:
        model, start_num_steps = model_handler.load_latest(device=device)
        model.action_space = action_space
        model.observation_space = observation_space
    elif config['load_model'] is not None:
        model, start_num_steps = model_handler.load(device=device, path=config['load_model'])
        model.action_space = action_space
        model.observation_space = observation_space
    else:
        model = build_model(config, observation_space=observation_space, action_space=action_space, device=device)
        start_num_steps = 0

    var_counts = util.count_vars(model)
    logging.info(f'Number of parameters: {var_counts}')

    reward = InteractionReward()

    # Number of episodes during evaluation
    if not config['num_eval_episodes']:
        config['num_eval_episodes'] = len(eval_formulas)

    training_envs = SimpleEnvContainer([
        MolecularEnvironment(
            reward=reward,
            observation_space=observation_space,
            action_space=action_space,
            formulas=[util.string_to_formula(f) for f in train_formulas],
            min_atomic_distance=config['min_atomic_distance'],
            max_solo_distance=config['max_solo_distance'],
            min_reward=config['min_reward'],
        ) for _ in range(config['num_envs'])
    ])

    eval_envs = SimpleEnvContainer([
        MolecularEnvironment(
            reward=reward,
            observation_space=observation_space,
            action_space=action_space,
            formulas=[util.string_to_formula(f) for f in eval_formulas],
            min_atomic_distance=config['min_atomic_distance'],
            max_solo_distance=config['max_solo_distance'],
            min_reward=config['min_reward'],
        )
    ])

    batch_ppo(
        envs=training_envs,
        eval_envs=eval_envs,
        ac=model,
        optimizer=util.get_optimizer(name=config['optimizer'],
                                     learning_rate=config['learning_rate'],
                                     parameters=model.parameters()),
        gamma=config['discount'],
        start_num_steps=start_num_steps,
        max_num_steps=config['max_num_steps'],
        num_steps_per_iter=config['num_steps_per_iter'],
        mini_batch_size=config['mini_batch_size'],
        clip_ratio=config['clip_ratio'],
        vf_coef=config['vf_coef'],
        entropy_coef=config['entropy_coef'],
        max_num_train_iters=config['max_num_train_iters'],
        lam=config['lam'],
        target_kl=config['target_kl'],
        gradient_clip=config['gradient_clip'],
        eval_freq=config['eval_freq'],
        model_handler=model_handler,
        save_freq=config['save_freq'],
        num_eval_episodes=config['num_eval_episodes'],
        rollout_saver=util.RolloutSaver(directory=config['data_dir'], tag=tag),
        save_train_rollout=config['save_rollouts'] == 'train' or config['save_rollouts'] == 'all',
        save_eval_rollout=config['save_rollouts'] == 'eval' or config['save_rollouts'] == 'all',
        info_saver=util.InfoSaver(directory=config['results_dir'], tag=tag),
        device=device,
    )