Пример #1
0
def run_model(parameters_dict):
    # Extracting search parameters
    explicit_search_parameters_dict = _extract_explicit_search_parameters(
        parameters_dict)

    # Extracting IO parameters
    explicit_IO_parameters_dict = _extract_explicit_IO_parameters(
        parameters_dict)

    # Building objective function
    evaluation_strategy = _parse_objective_function_strategy(
        parameters_dict,
        explicit_IO_parameters_dict=explicit_IO_parameters_dict)

    # Building action space
    action_spaces, action_spaces_parameters = _parse_action_space(
        parameters_dict)

    # Building mutation strategy
    mutation_strategy = _parse_mutation_parameters(
        explicit_search_parameters=explicit_search_parameters_dict,
        evaluation_strategy=evaluation_strategy,
        action_spaces=action_spaces,
        action_spaces_parameters=action_spaces_parameters)

    # Building stop criterion strategy
    stop_criterion_strategy = _parse_stop_criterion_strategy(
        explicit_search_parameters_dict=explicit_search_parameters_dict,
        explicit_IO_parameters_dict=explicit_IO_parameters_dict)

    # Building instance
    pop_alg = _build_instance(
        evaluation_strategy=evaluation_strategy,
        mutation_strategy=mutation_strategy,
        stop_criterion_strategy=stop_criterion_strategy,
        explicit_search_parameters_dict=explicit_search_parameters_dict,
        explicit_IO_parameters_dict=explicit_IO_parameters_dict)

    # GuacaMol special case
    if evaluation_strategy == "guacamol":
        model_generator = ChemPopAlgGoalDirectedGenerator(
            pop_alg=pop_alg,
            guacamol_init_top_100=explicit_search_parameters_dict[
                "guacamol_init_top_100"],
            init_pop_path=explicit_IO_parameters_dict["smiles_list_init_path"],
            output_save_path=explicit_IO_parameters_dict["model_path"])

        assess_goal_directed_generation(
            model_generator,
            json_output_file=join(explicit_IO_parameters_dict["model_path"],
                                  "output_GuacaMol.json"),
            benchmark_version='v2')

    else:
        pop_alg.run()
Пример #2
0
def main():
    population_size = 100  ### each generation for each mol in population, one oracle call.
    max_children = 10
    generations_num = int(max_oracle_num / population_size / max_children)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--pickle_directory',
        help=
        'Directory containing pickle files with the distribution statistics',
        default=None)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--n_jobs', type=int, default=-1)
    parser.add_argument('--generations', type=int, default=generations_num)
    parser.add_argument('--population_size', type=int, default=population_size)
    parser.add_argument('--num_sims', type=int, default=40)
    parser.add_argument('--max_children', type=int,
                        default=max_children)  ### 25 -> 5
    parser.add_argument('--max_atoms', type=int, default=60)
    parser.add_argument('--init_smiles', type=str, default='CC')
    parser.add_argument('--output_dir', type=str, default=None)
    parser.add_argument('--patience', type=int, default=5)
    parser.add_argument('--suite', default='v3')
    args = parser.parse_args()

    if args.output_dir is None:
        args.output_dir = os.path.dirname(os.path.realpath(__file__))

    if args.pickle_directory is None:
        args.pickle_directory = os.path.dirname(os.path.realpath(__file__))

    np.random.seed(args.seed)

    setup_default_logger()

    # save command line args
    with open(os.path.join(args.output_dir, 'goal_directed_params.json'),
              'w') as jf:
        json.dump(vars(args), jf, sort_keys=True, indent=4)

    optimiser = GB_MCTS_Generator(pickle_directory=args.pickle_directory,
                                  n_jobs=args.n_jobs,
                                  num_sims=args.num_sims,
                                  max_children=args.max_children,
                                  init_smiles=args.init_smiles,
                                  max_atoms=args.max_atoms,
                                  patience=args.patience,
                                  generations=args.generations,
                                  population_size=args.population_size)

    json_file_path = os.path.join(args.output_dir,
                                  'goal_directed_results.json')
    assess_goal_directed_generation(optimiser,
                                    json_output_file=json_file_path,
                                    benchmark_version=args.suite)
Пример #3
0
def entry_point():
    parser = argparse.ArgumentParser()
    parser.add_argument('--smiles_file', type=str)
    parser.add_argument('--db_fname', type=str)
    parser.add_argument('--selection_size', type=int, default=10)
    parser.add_argument('--radius', type=int, default=3)
    parser.add_argument('--replacements', type=int, default=1000)
    parser.add_argument('--min_size', type=int, default=0)
    parser.add_argument('--max_size', type=int, default=10)
    parser.add_argument('--min_inc', type=int, default=-7)
    parser.add_argument('--max_inc', type=int, default=7)
    parser.add_argument('--generations', type=int, default=1000)
    parser.add_argument('--ncpu', type=int, default=1)
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument('--output_dir', type=str, default=None)
    parser.add_argument('--suite', default='v2')

    args = parser.parse_args()

    np.random.seed(args.seed)

    setup_default_logger()

    if args.output_dir is None:
        args.output_dir = os.path.dirname(os.path.realpath(__file__))

    # save command line args
    with open(os.path.join(args.output_dir, 'goal_directed_params.json'),
              'w') as jf:
        json.dump(vars(args), jf, sort_keys=True, indent=4)

    optimiser = CREM_Generator(smi_file=args.smiles_file,
                               selection_size=args.selection_size,
                               db_fname=args.db_fname,
                               radius=args.radius,
                               min_size=args.min_size,
                               max_size=args.max_size,
                               min_inc=args.min_inc,
                               max_inc=args.max_inc,
                               replacements=args.replacements,
                               generations=args.generations,
                               ncpu=args.ncpu,
                               random_start=True,
                               output_dir=args.output_dir)

    json_file_path = os.path.join(args.output_dir,
                                  'goal_directed_results.json')
    assess_goal_directed_generation(optimiser,
                                    json_output_file=json_file_path,
                                    benchmark_version=args.suite)
Пример #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--n_jobs', type=int, default=-1)
    parser.add_argument('--episode_size', type=int, default=8192)
    parser.add_argument('--batch_size', type=int, default=1024)
    parser.add_argument('--entropy_weight', type=int, default=1)
    parser.add_argument('--kl_div_weight', type=int, default=10)
    parser.add_argument('--output_dir', default=None)
    parser.add_argument('--clip_param', type=int, default=0.2)
    parser.add_argument('--num_epochs', type=int, default=20)
    parser.add_argument('--model_path', default=None)
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument('--suite', default='v3')

    args = parser.parse_args()

    np.random.seed(args.seed)

    setup_default_logger()

    if args.output_dir is None:
        args.output_dir = os.path.dirname(os.path.realpath(__file__))

    if args.model_path is None:
        dir_path = os.path.dirname(os.path.realpath(__file__))
        args.model_path = os.path.join(dir_path, 'pretrained_model',
                                       'model_final_0.473.pt')

    # save command line args
    with open(os.path.join(args.output_dir, 'goal_directed_params.json'),
              'w') as jf:
        json.dump(vars(args), jf, sort_keys=True, indent=4)

    optimiser = PPODirectedGenerator(pretrained_model_path=args.model_path,
                                     num_epochs=args.num_epochs,
                                     episode_size=args.episode_size,
                                     batch_size=args.batch_size,
                                     entropy_weight=args.entropy_weight,
                                     kl_div_weight=args.kl_div_weight,
                                     clip_param=args.clip_param)

    json_file_path = os.path.join(args.output_dir,
                                  'goal_directed_results.json')
    assess_goal_directed_generation(optimiser,
                                    json_output_file=json_file_path,
                                    benchmark_version=args.suite)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--pickle_directory', help='Directory containing pickle files with the distribution statistics',
                        default=None)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--n_jobs', type=int, default=-1)
    parser.add_argument('--generations', type=int, default=1000)
    parser.add_argument('--population_size', type=int, default=100)
    parser.add_argument('--num_sims', type=int, default=40)
    parser.add_argument('--max_children', type=int, default=25)
    parser.add_argument('--max_atoms', type=int, default=60)
    parser.add_argument('--init_smiles', type=str, default='CC')
    parser.add_argument('--random_start', type=bool, default=False)
    parser.add_argument('--output_dir', type=str, default=None)
    parser.add_argument('--patience', type=int, default=5)
    args = parser.parse_args()

    if args.output_dir is None:
        args.output_dir = os.path.dirname(os.path.realpath(__file__))

    if args.pickle_directory is None:
        args.pickle_directory = os.path.dirname(os.path.realpath(__file__))

    np.random.seed(args.seed)

    setup_default_logger()

    # save command line args
    with open(os.path.join(args.output_dir, 'goal_directed_params.json'), 'w') as jf:
        json.dump(vars(args), jf, sort_keys=True, indent=4)

    optimiser = GB_MCTS_Generator(pickle_directory=args.pickle_directory,
                                  n_jobs=args.n_jobs,
                                  random_start=args.random_start,
                                  num_sims=args.num_sims,
                                  max_children=args.max_children,
                                  init_smiles=args.init_smiles,
                                  max_atoms=args.max_atoms,
                                  patience=args.patience,
                                  generations=args.generations,
                                  population_size=args.population_size)

    json_file_path = os.path.join(args.output_dir, 'goal_directed_results.json')
    assess_goal_directed_generation(optimiser, json_output_file=json_file_path)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--smiles_file', default='data/guacamol_v1_all.smiles')
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument('--population_size', type=int, default=100)
    parser.add_argument('--n_mutations', type=int, default=200)
    parser.add_argument('--gene_size', type=int, default=300)
    parser.add_argument('--generations', type=int, default=1000)
    parser.add_argument('--n_jobs', type=int, default=-1)
    parser.add_argument('--random_start', action='store_true')
    parser.add_argument('--output_dir', type=str, default=None)
    parser.add_argument('--patience', type=int, default=5)
    parser.add_argument('--suite', default='v1')

    args = parser.parse_args()

    np.random.seed(args.seed)

    setup_default_logger()

    if args.output_dir is None:
        args.output_dir = os.path.dirname(os.path.realpath(__file__))

    # save command line args
    with open(os.path.join(args.output_dir, 'goal_directed_params.json'),
              'w') as jf:
        json.dump(vars(args), jf, sort_keys=True, indent=4)

    optimiser = ChemGEGenerator(smi_file=args.smiles_file,
                                population_size=args.population_size,
                                n_mutations=args.n_mutations,
                                gene_size=args.gene_size,
                                generations=args.generations,
                                n_jobs=args.n_jobs,
                                random_start=args.random_start,
                                patience=args.patience)

    json_file_path = os.path.join(args.output_dir,
                                  'goal_directed_results.json')
    assess_goal_directed_generation(optimiser,
                                    json_output_file=json_file_path,
                                    benchmark_version=args.suite)
from guacamol.assess_goal_directed_generation import assess_goal_directed_generation
from generative_playground.molecules.guacamol_utils import MyGoalDirectedGenerator

my_gen = MyGoalDirectedGenerator('trivial')
assess_goal_directed_generation(goal_directed_molecule_generator=my_gen,
                                benchmark_version='trivial')
Пример #8
0
    parser.add_argument('--n_jobs', type=int, default=-1)
    parser.add_argument('--suite', default='v3')
    args = parser.parse_args()

    if args.output_dir is None:
        args.output_dir = os.path.dirname(os.path.realpath(__file__))

    if args.model_path is None:
        dir_path = os.path.dirname(os.path.realpath(__file__))
        args.model_path = os.path.join(dir_path, 'pretrained_model',
                                       'model_final_0.473.pt')

    optimizer = SmilesRnnDirectedGenerator(
        pretrained_model_path=args.model_path,
        n_epochs=args.n_epochs,
        mols_to_sample=args.mols_to_sample,
        keep_top=args.keep_top,
        optimize_n_epochs=args.optimize_n_epochs,
        max_len=args.max_len,
        optimize_batch_size=args.optimize_batch_size,
        number_final_samples=args.benchmark_num_samples,
        random_start=args.random_start,
        smi_file=args.smiles_file,
        n_jobs=args.n_jobs)

    json_file_path = os.path.join(args.output_dir,
                                  'goal_directed_results.json')
    assess_goal_directed_generation(optimizer,
                                    json_output_file=json_file_path,
                                    benchmark_version=args.suite)
Пример #9
0
import argparse
import os

from guacamol.assess_goal_directed_generation import assess_goal_directed_generation
from guacamol.utils.helpers import setup_default_logger

from .chembl_file_reader import ChemblFileReader
from .optimizer import BestFromChemblOptimizer

if __name__ == '__main__':
    setup_default_logger()

    parser = argparse.ArgumentParser(description='Goal-directed benchmark for best molecules from SMILES file',
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--smiles_file', default='data/guacamol_v1_all.smiles')
    parser.add_argument('--output_dir', default=None, help='Output directory')

    args = parser.parse_args()

    if args.output_dir is None:
        args.output_dir = os.path.dirname(os.path.realpath(__file__))

    smiles_reader = ChemblFileReader(args.smiles_file)

    optimizer = BestFromChemblOptimizer(smiles_reader=smiles_reader)

    json_file_path = os.path.join(args.output_dir, 'goal_directed_results.json')

    assess_goal_directed_generation(optimizer, json_output_file=json_file_path)
Пример #10
0
def run_model(parameters_dict):
    # Extracting search parameters
    explicit_search_parameters_dict = _extract_explicit_search_parameters(
        parameters_dict)

    # Extracting IO parameters
    explicit_IO_parameters_dict = _extract_explicit_IO_parameters(
        parameters_dict)

    # Building objective function
    evaluation_strategy = _parse_objective_function_strategy(
        parameters_dict,
        explicit_IO_parameters_dict=explicit_IO_parameters_dict,
        explicit_search_parameters_dict=explicit_search_parameters_dict)

    # Building action space
    action_spaces, action_spaces_parameters, explicit_action_space_parameters = _parse_action_space(
        parameters_dict)

    # Building mutation strategy
    mutation_strategy = _parse_mutation_parameters(
        explicit_search_parameters=explicit_search_parameters_dict,
        evaluation_strategy=evaluation_strategy,
        action_spaces=action_spaces,
        action_spaces_parameters=action_spaces_parameters,
        search_space_parameters=explicit_action_space_parameters)

    # Building stop criterion strategy
    stop_criterion_strategy = _parse_stop_criterion_strategy(
        explicit_search_parameters_dict=explicit_search_parameters_dict,
        explicit_IO_parameters_dict=explicit_IO_parameters_dict)

    # Building instance
    pop_alg = _build_instance(
        evaluation_strategy=evaluation_strategy,
        mutation_strategy=mutation_strategy,
        stop_criterion_strategy=stop_criterion_strategy,
        explicit_search_parameters_dict=explicit_search_parameters_dict,
        explicit_IO_parameters_dict=explicit_IO_parameters_dict)

    # GuacaMol special case
    if is_or_contains_undefined_GuacaMol_evaluation_strategy(
            evaluation_strategy):

        model_generator = ChemPopAlgGoalDirectedGenerator(
            pop_alg=pop_alg,
            guacamol_init_top_100=explicit_search_parameters_dict[
                "guacamol_init_top_100"],
            init_pop_path=explicit_IO_parameters_dict["smiles_list_init_path"],
            output_save_path=explicit_IO_parameters_dict["model_path"])

        # Extracting proper set of benchmarks
        benchmark_parameter = get_GuacaMol_benchmark_parameter(
            evaluation_strategy)
        benchmark_key = benchmark_parameter.split("_")[1]

        assess_goal_directed_generation(
            model_generator,
            json_output_file=join(explicit_IO_parameters_dict["model_path"],
                                  "output_GuacaMol.json"),
            benchmark_version=benchmark_key)

    else:
        pop_alg.run()
        return pop_alg
Пример #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--smiles_file', default='data/guacamol_v1_all.smi')
    parser.add_argument('--seed', type=int, default=24)
    parser.add_argument('--output_dir', type=str, default=None)
    parser.add_argument('--suite', default='v2')

    parser.add_argument('--generations', type=int, default=1000)
    parser.add_argument('--population', type=int, default=100)
    parser.add_argument('--selection_size', type=int, default=200)
    parser.add_argument('--selection_method', type=str, default='linear')
    parser.add_argument('--derive_size', type=int, default=100)

    parser.add_argument('--brics_fragment_db',
                        type=str,
                        default='deriver_goal/chembl.db')
    parser.add_argument('--selfies_proportion', type=float, default=0)
    parser.add_argument('--brics_proportion', type=float, default=0)
    parser.add_argument('--selfies_gb_proportion', type=float, default=0)
    parser.add_argument('--smiles_gb_proportion', type=float, default=0)
    parser.add_argument('--mutation_rate', type=float, default=0.5)

    parser.add_argument('--enable_scanner', action='store_true', default=False)
    parser.add_argument('--enable_filter', action='store_true', default=False)
    parser.add_argument('--delayed_filtering', type=float, default=0)

    parser.add_argument('--patience', type=int, default=5)

    parser.add_argument('--temperature', type=float, default=1)
    parser.add_argument('--temp_decay', type=float, default=0.95)
    parser.add_argument('--start_task', type=int, default=0)
    parser.add_argument('--random_start', action='store_true', default=False)

    parser.add_argument('--derive_population',
                        action='store_true',
                        default=False)
    parser.add_argument('--counterscreen', action='store_true', default=False)

    args = parser.parse_args()

    #assert os.path.exists('data/guacamol_v1_all.smiles')

    try:
        os.remove('deriver_goal/all_output_smiles.smi')
        os.remove('deriver_goal/mean_scores_by_generation.txt')
        os.remove('deriver_goal/best_scores_by_generation.txt')
        os.remove('deriver_goal/worst_scores_by_generation.txt')
        os.remove('deriver_goal/selected_output_smiles.smi')
        os.remove('deriver_goal/top_100_output_smiles.smi')
    except FileNotFoundError:
        pass

    np.random.seed(args.seed)
    random.seed = args.seed

    # turn off the millions of lines of useless text
    logging.basicConfig(format='%(levelname)s : %(message)s',
                        level=logging.CRITICAL)

    if args.output_dir is None:
        args.output_dir = os.path.dirname(os.path.realpath(__file__))

    # save command line args
    with open(os.path.join(args.output_dir, 'goal_directed_params.json'),
              'w') as jf:
        json.dump(vars(args), jf, sort_keys=True, indent=4)

    optimiser = DeriverGenerator(
        smi_file=args.smiles_file,
        generations=args.generations,
        population=args.population,
        selection_size=args.selection_size,
        selection_method=args.selection_method,
        derive_size=args.derive_size,
        brics_fragment_db=args.brics_fragment_db,
        selfies_proportion=args.selfies_proportion,
        brics_proportion=args.brics_proportion,
        selfies_gb_proportion=args.selfies_gb_proportion,
        smiles_gb_proportion=args.smiles_gb_proportion,
        mutation_rate=args.mutation_rate,
        enable_scanner=args.enable_scanner,
        enable_filter=args.enable_filter,
        delayed_filtering=args.delayed_filtering,
        patience=args.patience,
        temperature=args.temperature,
        temp_decay=args.temp_decay,
        start_task=args.start_task,
        random_start=args.random_start,
        derive_population=args.derive_population,
        counterscreen=args.counterscreen)

    json_file_path = os.path.join(args.output_dir,
                                  'goal_directed_results.json')
    assess_goal_directed_generation(optimiser,
                                    json_output_file=json_file_path,
                                    benchmark_version=args.suite)
    iterations = 100
    balance = 1.0
    version = "v2"
    accumulate_rewards = False

    json_file = "guacamol/json_results/first_nn_trial.json"

    # neural net
    hidden_size = 256
    num_layers = 2
    biderectional = False
    file = "deep_likeliness/saved_parameters/deep_scorer_network.pth"
    neural_net = RNNEncoder(hidden_size=hidden_size,
                            num_layers=num_layers,
                            bidirectional=biderectional)
    neural_net.load_state_dict(torch.load(file))

    model = SynergeticMolGDG(
        neural_network=neural_net,
        mutation_rate=mutation_rate,
        n_walkers=n_walkers,
        balance=balance,
        iterations=iterations,
        accumulate_rewards=accumulate_rewards,
        n_cpu=n_cpu,
    )

    assess_goal_directed_generation(model,
                                    json_output_file=json_file,
                                    benchmark_version=version)
Пример #13
0
                all_smiles.add(smiles)

            if self.best_score < score:
                self.best_score = score
                self.best_smiles = smiles

            return score

        mcts = LanguageModelMCTSWithPUCTTerminating(self.lm,
                                                    width,
                                                    max_depth,
                                                    eval_function,
                                                    cpuct=c,
                                                    terminating_symbol='</s>')
        mcts.search(["<s>"], num_simulations)

        return [self.best_smiles]


if __name__ == '__main__':
    setup_default_logger()

    generator = ChemgramsMCTSGoalDirectedGenerator()

    json_file_path = os.path.join('../models',
                                  'goal_directed_learning_results.json')

    assess_goal_directed_generation(generator,
                                    json_output_file=json_file_path,
                                    benchmark_version='v2')