def _get_case_data(mongo_client): """ Get a :class:`.CaseData` instance. Parameters ---------- mongo_client : :class:`pymongo.MongoClient` The mongo client the database should connect to. """ # The basic idea here is that the _counter.get_count method will # return a different "fitness value" each time it is called. # When the test runs fitness_calculator.get_fitness_value(), if # caching is working, the same number as before will be returned. # However, if caching is not working, a different number will be # returned as the fitness value. db = stk.ValueMongoDb( mongo_client=mongo_client, collection='test_caching', database='_stk_pytest_database', ) fitness_calculator = stk.PropertyVector( property_functions=(_counter.get_count, ), input_database=db, output_database=db, ) molecule = stk.BuildingBlock('BrCCBr') fitness_value = fitness_calculator.get_fitness_value(molecule) return CaseData( fitness_calculator=fitness_calculator, molecule=molecule, fitness_value=fitness_value, )
scores = [] for bb in mol.get_building_blocks(): rdkit_mol = bb.to_rdkit_mol() rdkit_mol.UpdatePropertyCache() rdkit.GetSymmSSSR(rdkit_mol) rdkit_mol.GetRingInfo() scores.append(scscorer(rdkit_mol)[1]) return sum(scores) # Defines synthetic accesibility function to use. synthetic_accesibility_func = scscore cage_fitness_calculator = stk.PropertyVector( pore_diameter, largest_window, window_std, synthetic_accesibility_func, ) fitness_calculator = stk.If( condition=lambda mol: failed_optimizer.is_in_cache(mol), true_calculator=stk.FitnessFunction(lambda mol: None), false_calculator=cage_fitness_calculator, ) # ##################################################################### # Fitness normalizer. # ##################################################################### def valid_fitness(population, mol):
pw_mol = pywindow.Molecule.load_rdkit_mol(mol.to_rdkit_mol()) mol.pore_diameter = abs(pw_mol.calculate_pore_diameter() - 5) return mol.pore_diameter def window_std(mol): pw_mol = pywindow.Molecule.load_rdkit_mol(mol.to_rdkit_mol()) windows = pw_mol.calculate_windows() mol.window_std = None if windows is not None and len(windows) > 3: mol.window_std = np.std(windows) return mol.window_std fitness_calculator = stk.PropertyVector( pore_diameter, window_std, ) def valid_fitness(population, mol): return None not in population.get_fitness_values()[mol] fitness_normalizer = stk.Sequence( stk.Power([1, -1], filter=valid_fitness), stk.DivideByMean(filter=valid_fitness), stk.Multiply([1.0, 1.0], filter=valid_fitness), stk.Sum(filter=valid_fitness), stk.ReplaceFitness( replacement_fn=lambda population: min(
# ##################################################################### # Optimizer. # ##################################################################### optimizer = stk.NullOptimizer(use_cache=True) # ##################################################################### # Fitness calculator. # ##################################################################### def num_atoms(mol): return len(mol.atoms) fitness_calculator = stk.PropertyVector(num_atoms) # ##################################################################### # Fitness normalizer. # ##################################################################### # The PropertyVector fitness calculator will set the fitness as # [n_atoms] use the Sum() fitness normalizer to convert the fitness to # just n_atoms^0.5. The sqrt is because we use the Power normalizer. fitness_normalizer = stk.NormalizerSequence(stk.Power(0.5), stk.Sum()) # ##################################################################### # Exit condition. # ##################################################################### terminator = stk.NumGenerations(25)
import numpy as np import pytest import stk from ..case_data import CaseData @pytest.fixture( scope='session', params=(lambda: CaseData( fitness_calculator=stk.PropertyVector(property_functions=( stk.Molecule.get_num_atoms, stk.Molecule.get_num_bonds, stk.Molecule.get_maximum_diameter, ), ), molecule=stk.BuildingBlock('BrCCBr').with_position_matrix( position_matrix=np.array([ [0, 0, 0], [10, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ], dtype=np.float64), ), fitness_value=(8, 7, 10), ), ), )
def main(): parser = argparse.ArgumentParser() parser.add_argument('--mongodb_uri', help='The MongoDB URI for the database to connect to.', default='mongodb://localhost:27017/') args = parser.parse_args() logging.basicConfig(level=logging.INFO) # Use a random seed to get reproducible results. random_seed = 4 generator = np.random.RandomState(random_seed) logger.info('Making building blocks.') # Load the building block databases. fluoros = tuple( get_building_blocks( path=pathlib.Path(__file__).parent / 'fluoros.txt', functional_group_factory=stk.FluoroFactory(), )) bromos = tuple( get_building_blocks( path=pathlib.Path(__file__).parent / 'bromos.txt', functional_group_factory=stk.BromoFactory(), )) initial_population = tuple(get_initial_population(fluoros, bromos)) # Write the initial population. for i, record in enumerate(initial_population): write(record.get_molecule(), f'initial_{i}.mol') client = pymongo.MongoClient(args.mongodb_uri) db = stk.ConstructedMoleculeMongoDb(client) fitness_db = stk.ValueMongoDb(client, 'fitness_values') # Plot selections. generation_selector = stk.Best( num_batches=25, duplicate_molecules=False, ) stk.SelectionPlotter('generation_selection', generation_selector) mutation_selector = stk.Roulette( num_batches=5, random_seed=generator.randint(0, 1000), ) stk.SelectionPlotter('mutation_selection', mutation_selector) crossover_selector = stk.Roulette( num_batches=3, batch_size=2, random_seed=generator.randint(0, 1000), ) stk.SelectionPlotter('crossover_selection', crossover_selector) fitness_calculator = stk.PropertyVector( property_functions=( get_num_rotatable_bonds, get_complexity, get_num_bad_rings, ), input_database=fitness_db, output_database=fitness_db, ) fitness_normalizer = stk.NormalizerSequence( fitness_normalizers=( # Prevent division by 0 error in DivideByMean, by ensuring # a value of each property to be at least 1. stk.Add((1, 1, 1)), stk.DivideByMean(), # Obviously, because all coefficients are equal, the # Multiply normalizer does not need to be here. However, # it's here to show that you can easily change the relative # importance of each component of the fitness value, by # changing the values of the coefficients. stk.Multiply((1, 1, 1)), stk.Sum(), stk.Power(-1), ), ) ea = stk.EvolutionaryAlgorithm( num_processes=1, initial_population=initial_population, fitness_calculator=fitness_calculator, mutator=stk.RandomMutator( mutators=( stk.RandomBuildingBlock( building_blocks=fluoros, is_replaceable=is_fluoro, random_seed=generator.randint(0, 1000), ), stk.SimilarBuildingBlock( building_blocks=fluoros, is_replaceable=is_fluoro, random_seed=generator.randint(0, 1000), ), stk.RandomBuildingBlock( building_blocks=bromos, is_replaceable=is_bromo, random_seed=generator.randint(0, 1000), ), stk.SimilarBuildingBlock( building_blocks=bromos, is_replaceable=is_bromo, random_seed=generator.randint(0, 1000), ), ), random_seed=generator.randint(0, 1000), ), crosser=stk.GeneticRecombination(get_gene=get_functional_group_type, ), generation_selector=generation_selector, mutation_selector=mutation_selector, crossover_selector=crossover_selector, fitness_normalizer=fitness_normalizer, ) logger.info('Starting EA.') generations = [] for generation in ea.get_generations(50): for record in generation.get_molecule_records(): db.put(record.get_molecule()) generations.append(generation) # Write the final population. for i, record in enumerate(generation.get_molecule_records()): write(record.get_molecule(), f'final_{i}.mol') logger.info('Making fitness plot.') # Normalize the fitness values across the entire EA before # plotting the fitness values. generations = tuple( normalize_generations( fitness_calculator=fitness_calculator, fitness_normalizer=fitness_normalizer, generations=generations, )) fitness_progress = stk.ProgressPlotter( generations=generations, get_property=lambda record: record.get_fitness_value(), y_label='Fitness Value', ) fitness_progress.write('fitness_progress.png') fitness_progress.get_plot_data().to_csv('fitness_progress.csv') logger.info('Making rotatable bonds plot.') rotatable_bonds_progress = stk.ProgressPlotter( generations=generations, get_property=lambda record: get_num_rotatable_bonds(record. get_molecule()), y_label='Number of Rotatable Bonds', ) rotatable_bonds_progress.write('rotatable_bonds_progress.png')