topology_graphs=[stk.cage.FourPlusSix()], size=population_size, random_seed=random_seed, use_cache=True, ) # ##################################################################### # Selector for selecting the next generation. # ##################################################################### generation_selector = stk.Sequence( stk.AboveAverage(duplicate_mols=False), stk.RemoveMolecules( remover=stk.AboveAverage(duplicate_mols=False), selector=stk.Roulette( duplicate_mols=False, random_seed=random_seed, ), ), num_batches=population_size, ) # ##################################################################### # Selector for selecting parents. # ##################################################################### crossover_selector = stk.StochasticUniversalSampling( num_batches=5, batch_size=2, duplicate_batches=False, random_seed=random_seed, )
# Settings for tournament sampling for crossover. crossover_selector = stk.Tournament( num_batches=10, batch_size=2, duplicate_batches=False, duplicate_mols=False, random_seed=random_seed, ) # ##################################################################### # Selector for selecting molecules for mutation. # ##################################################################### mutation_selector = stk.Roulette( num_batches=5, duplicate_mols=False, batch_size=1, random_seed=random_seed, ) # ##################################################################### # Crosser. # ##################################################################### crosser = stk.GeneticRecombination( key=lambda mol: mol.func_groups[0].fg_type.name, random_seed=random_seed, ) # ##################################################################### # Mutator. # #####################################################################
stk.polymer.Linear('A', 6), stk.polymer.Linear('A', 12) ] population = stk.EAPopulation.init_random(building_blocks=[building_blocks], topology_graphs=topology_graphs, size=25, use_cache=True) # ##################################################################### # Selector for selecting the next generation. # ##################################################################### generation_selector = stk.SelectorSequence( stk.Fittest(num_batches=3, duplicates=False), stk.Roulette(num_batches=22, duplicates=False)) # ##################################################################### # Selector for selecting parents. # ##################################################################### crossover_selector = stk.AboveAverage(num_batches=5, batch_size=2) # ##################################################################### # Selector for selecting molecules for mutation. # ##################################################################### mutation_selector = stk.SelectorFunnel( stk.AboveAverage(num_batches=10, duplicates=False), stk.Roulette(num_batches=5))
stk.MoleculeRecord( topology_graph=get_topology_graph(3), ).with_fitness_value(9), stk.MoleculeRecord( topology_graph=get_topology_graph(4), ).with_fitness_value(2), stk.MoleculeRecord( topology_graph=get_topology_graph(5), ).with_fitness_value(1), stk.MoleculeRecord( topology_graph=get_topology_graph(6), ).with_fitness_value(1), ) @pytest.fixture( scope='session', params=( lambda population: CaseData( selector=stk.Roulette(duplicate_molecules=False, ), population=population, selected=( stk.Batch( records=(population[0], ), fitness_values={population[0]: 10}, key_maker=stk.Inchi(), ), stk.Batch( records=(population[1], ), fitness_values={population[1]: 9}, key_maker=stk.Inchi(), ), stk.Batch( records=(population[2], ), fitness_values={population[2]: 2},
from .case_data import CaseData def get_topology_graph(num_repeating_units): return stk.polymer.Linear( building_blocks=(stk.BuildingBlock('BrCCBr', [stk.BromoFactory()]), ), repeating_unit='A', num_repeating_units=num_repeating_units, ) @pytest.fixture( scope='session', params=(lambda: CaseData( selector=stk.Roulette(num_batches=50), population=( stk.MoleculeRecord(topology_graph=get_topology_graph(2), ).with_fitness_value(1), stk.MoleculeRecord(topology_graph=get_topology_graph(3), ).with_fitness_value(2), stk.MoleculeRecord(topology_graph=get_topology_graph(4), ). with_fitness_value(3), stk.MoleculeRecord(topology_graph=get_topology_graph(5), ).with_fitness_value(4), stk.MoleculeRecord(topology_graph=get_topology_graph(6), ).with_fitness_value(5), stk.MoleculeRecord(topology_graph=get_topology_graph(7), ).with_fitness_value(6), stk.MoleculeRecord(topology_graph=get_topology_graph(8), ).with_fitness_value(7),
def main(): parser = argparse.ArgumentParser() parser.add_argument('--mongodb_uri', help='The MongoDB URI for the database to connect to.', default='mongodb://localhost:27017/') args = parser.parse_args() logging.basicConfig(level=logging.INFO) # Use a random seed to get reproducible results. random_seed = 4 generator = np.random.RandomState(random_seed) logger.info('Making building blocks.') # Load the building block databases. fluoros = tuple( get_building_blocks( path=pathlib.Path(__file__).parent / 'fluoros.txt', functional_group_factory=stk.FluoroFactory(), )) bromos = tuple( get_building_blocks( path=pathlib.Path(__file__).parent / 'bromos.txt', functional_group_factory=stk.BromoFactory(), )) initial_population = tuple(get_initial_population(fluoros, bromos)) # Write the initial population. for i, record in enumerate(initial_population): write(record.get_molecule(), f'initial_{i}.mol') client = pymongo.MongoClient(args.mongodb_uri) db = stk.ConstructedMoleculeMongoDb(client) fitness_db = stk.ValueMongoDb(client, 'fitness_values') # Plot selections. generation_selector = stk.Best( num_batches=25, duplicate_molecules=False, ) stk.SelectionPlotter('generation_selection', generation_selector) mutation_selector = stk.Roulette( num_batches=5, random_seed=generator.randint(0, 1000), ) stk.SelectionPlotter('mutation_selection', mutation_selector) crossover_selector = stk.Roulette( num_batches=3, batch_size=2, random_seed=generator.randint(0, 1000), ) stk.SelectionPlotter('crossover_selection', crossover_selector) fitness_calculator = stk.PropertyVector( property_functions=( get_num_rotatable_bonds, get_complexity, get_num_bad_rings, ), input_database=fitness_db, output_database=fitness_db, ) fitness_normalizer = stk.NormalizerSequence( fitness_normalizers=( # Prevent division by 0 error in DivideByMean, by ensuring # a value of each property to be at least 1. stk.Add((1, 1, 1)), stk.DivideByMean(), # Obviously, because all coefficients are equal, the # Multiply normalizer does not need to be here. However, # it's here to show that you can easily change the relative # importance of each component of the fitness value, by # changing the values of the coefficients. stk.Multiply((1, 1, 1)), stk.Sum(), stk.Power(-1), ), ) ea = stk.EvolutionaryAlgorithm( num_processes=1, initial_population=initial_population, fitness_calculator=fitness_calculator, mutator=stk.RandomMutator( mutators=( stk.RandomBuildingBlock( building_blocks=fluoros, is_replaceable=is_fluoro, random_seed=generator.randint(0, 1000), ), stk.SimilarBuildingBlock( building_blocks=fluoros, is_replaceable=is_fluoro, random_seed=generator.randint(0, 1000), ), stk.RandomBuildingBlock( building_blocks=bromos, is_replaceable=is_bromo, random_seed=generator.randint(0, 1000), ), stk.SimilarBuildingBlock( building_blocks=bromos, is_replaceable=is_bromo, random_seed=generator.randint(0, 1000), ), ), random_seed=generator.randint(0, 1000), ), crosser=stk.GeneticRecombination(get_gene=get_functional_group_type, ), generation_selector=generation_selector, mutation_selector=mutation_selector, crossover_selector=crossover_selector, fitness_normalizer=fitness_normalizer, ) logger.info('Starting EA.') generations = [] for generation in ea.get_generations(50): for record in generation.get_molecule_records(): db.put(record.get_molecule()) generations.append(generation) # Write the final population. for i, record in enumerate(generation.get_molecule_records()): write(record.get_molecule(), f'final_{i}.mol') logger.info('Making fitness plot.') # Normalize the fitness values across the entire EA before # plotting the fitness values. generations = tuple( normalize_generations( fitness_calculator=fitness_calculator, fitness_normalizer=fitness_normalizer, generations=generations, )) fitness_progress = stk.ProgressPlotter( generations=generations, get_property=lambda record: record.get_fitness_value(), y_label='Fitness Value', ) fitness_progress.write('fitness_progress.png') fitness_progress.get_plot_data().to_csv('fitness_progress.csv') logger.info('Making rotatable bonds plot.') rotatable_bonds_progress = stk.ProgressPlotter( generations=generations, get_property=lambda record: get_num_rotatable_bonds(record. get_molecule()), y_label='Number of Rotatable Bonds', ) rotatable_bonds_progress.write('rotatable_bonds_progress.png')
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--mongodb_uri', help='The MongoDB URI for the database to connect to.', default='mongodb://localhost:27017/', ) args = parser.parse_args() logging.basicConfig(level=logging.INFO) # Use a random seed to get reproducible results. random_seed = 4 generator = np.random.RandomState(random_seed) logger.info('Making building blocks.') # Load the building block databases. fluoros = tuple( get_building_blocks( path=pathlib.Path(__file__).parent / 'fluoros.txt', functional_group_factory=stk.FluoroFactory(), )) bromos = tuple( get_building_blocks( path=pathlib.Path(__file__).parent / 'bromos.txt', functional_group_factory=stk.BromoFactory(), )) initial_population = tuple(get_initial_population(fluoros, bromos)) # Write the initial population. for i, record in enumerate(initial_population): write(record.get_molecule(), f'initial_{i}.mol') client = pymongo.MongoClient(args.mongodb_uri) db = stk.ConstructedMoleculeMongoDb(client) ea = stk.EvolutionaryAlgorithm( initial_population=initial_population, fitness_calculator=stk.FitnessFunction(get_fitness_value), mutator=stk.RandomMutator( mutators=( stk.RandomBuildingBlock( building_blocks=fluoros, is_replaceable=is_fluoro, random_seed=generator.randint(0, 1000), ), stk.SimilarBuildingBlock( building_blocks=fluoros, is_replaceable=is_fluoro, random_seed=generator.randint(0, 1000), ), stk.RandomBuildingBlock( building_blocks=bromos, is_replaceable=is_bromo, random_seed=generator.randint(0, 1000), ), stk.SimilarBuildingBlock( building_blocks=bromos, is_replaceable=is_bromo, random_seed=generator.randint(0, 1000), ), ), random_seed=generator.randint(0, 1000), ), crosser=stk.GeneticRecombination(get_gene=get_functional_group_type, ), generation_selector=stk.Best( num_batches=25, duplicate_molecules=False, ), mutation_selector=stk.Roulette( num_batches=5, random_seed=generator.randint(0, 1000), ), crossover_selector=stk.Roulette( num_batches=3, batch_size=2, random_seed=generator.randint(0, 1000), ), ) logger.info('Starting EA.') generations = [] for generation in ea.get_generations(50): for record in generation.get_molecule_records(): db.put(record.get_molecule()) generations.append(generation) # Write the final population. for i, record in enumerate(generation.get_molecule_records()): write(record.get_molecule(), f'final_{i}.mol') logger.info('Making fitness plot.') fitness_progress = stk.ProgressPlotter( generations=generations, get_property=lambda record: record.get_fitness_value(), y_label='Fitness Value', ) fitness_progress.write('fitness_progress.png') logger.info('Making rotatable bonds plot.') rotatable_bonds_progress = stk.ProgressPlotter( generations=generations, get_property=get_num_rotatable_bonds, y_label='Number of Rotatable Bonds', ) rotatable_bonds_progress.write('rotatable_bonds_progress.png')