示例#1
0
def test_put_caching(mongo_client):
    database_name = '_test_put_caching'
    mongo_client.drop_database(database_name)

    database = stk.ConstructedMoleculeMongoDb(
        mongo_client=mongo_client,
        database=database_name,
    )
    molecule = stk.BuildingBlock('BrCCCBr', [stk.BromoFactory()])
    polymer = stk.ConstructedMolecule(topology_graph=stk.polymer.Linear(
        building_blocks=(molecule, ),
        repeating_unit='A',
        num_repeating_units=3,
    ), )
    database.put(polymer)
    database.put(polymer)

    cache_info = database._put.cache_info()
    assert cache_info.hits == 1
    assert cache_info.misses == 1

    database.put(molecule=polymer.with_position_matrix(
        position_matrix=np.zeros((polymer.get_num_atoms(), 3)), ), )
    cache_info = database._put.cache_info()
    assert cache_info.hits == 1
    assert cache_info.misses == 2
示例#2
0
def add_constructed_molecules(
    client,
    database,
    key_makers,
):
    constructed_molecule_db = stk.ConstructedMoleculeMongoDb(
        mongo_client=client,
        database=database,
        molecule_collection='molecules',
        position_matrix_collection='position_matrices',
        jsonizer=stk.ConstructedMoleculeJsonizer(key_makers=key_makers, ),
    )
    num_atoms_db = stk.ValueMongoDb(
        mongo_client=client,
        collection='numAtoms',
        database=database,
        key_makers=key_makers,
    )
    for bb1, bb2 in zip(
            get_molecules(200, 5),
            get_molecules(200, 5),
    ):
        molecule = stk.ConstructedMolecule(topology_graph=stk.polymer.Linear(
            building_blocks=(bb1, bb2),
            repeating_unit='AB',
            num_repeating_units=1,
        ), )
        constructed_molecule_db.put(molecule)
        num_atoms_db.put(molecule, molecule.get_num_atoms())
        num_atoms_db.put(bb1, bb1.get_num_atoms())
示例#3
0
def test_get_caching(mongo_client):
    database_name = '_test_get_caching'
    mongo_client.drop_database(database_name)

    database = stk.ConstructedMoleculeMongoDb(
        mongo_client=mongo_client,
        database=database_name,
    )
    molecule = stk.BuildingBlock('BrCCCBr', [stk.BromoFactory()])
    polymer = stk.ConstructedMolecule(topology_graph=stk.polymer.Linear(
        building_blocks=(molecule, ),
        repeating_unit='A',
        num_repeating_units=3,
    ), )
    database.put(polymer)
    database.get({
        stk.InchiKey().get_key_name():
        stk.InchiKey().get_key(polymer),
    })
    database.get({
        stk.InchiKey().get_key_name():
        stk.InchiKey().get_key(polymer),
    })

    cache_info = database._get.cache_info()
    assert cache_info.hits == 1
    assert cache_info.misses == 1
示例#4
0
def test_get_all():
    """
    Test iteration over all molecules.

    """

    database_name = '_test_get_entries_constructed_molecule'
    client = pymongo.MongoClient()
    client.drop_database(database_name)

    key_maker = stk.Inchi()
    jsonizer = stk.ConstructedMoleculeJsonizer(key_makers=(key_maker, ))

    database = stk.ConstructedMoleculeMongoDb(
        mongo_client=client,
        database=database_name,
        jsonizer=jsonizer,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
    )

    molecules = [
        stk.ConstructedMolecule(topology_graph=stk.polymer.Linear(
            building_blocks=(stk.BuildingBlock(
                smiles='BrCCCBr', functional_groups=[stk.BromoFactory()]), ),
            repeating_unit='A',
            num_repeating_units=3,
        ), ),
        stk.ConstructedMolecule(topology_graph=stk.polymer.Linear(
            building_blocks=(
                stk.BuildingBlock(smiles='BrCCBr',
                                  functional_groups=[stk.BromoFactory()]),
                stk.BuildingBlock(smiles='BrCNCBr',
                                  functional_groups=[stk.BromoFactory()]),
            ),
            repeating_unit='AB',
            num_repeating_units=2,
        ), ),
    ]
    molecules_by_key = {
        key_maker.get_key(molecule): molecule
        for molecule in molecules
    }

    for molecule in molecules:
        database.put(molecule)

    for i, retrieved in enumerate(database.get_all()):
        key = key_maker.get_key(retrieved)
        molecule = molecules_by_key[key]
        is_equivalent_constructed_molecule(
            molecule.with_canonical_atom_ordering(),
            retrieved.with_canonical_atom_ordering(),
        )

    # Check number of molecules.
    assert i + 1 == len(molecules)
示例#5
0
def get_database(
    database_name: str,
    mongo_client: pymongo.MongoClient,
    key_makers: tuple[stk.MoleculeKeyMaker, ...],
    indices: tuple[str, ...],
) -> stk.ConstructedMoleculeMongoDb:

    return stk.ConstructedMoleculeMongoDb(
        mongo_client=mongo_client,
        database=database_name,
        jsonizer=stk.ConstructedMoleculeJsonizer(key_makers),
        put_lru_cache_size=0,
        get_lru_cache_size=0,
        indices=indices,
    )
示例#6
0
        The key used to retrieve the :attr:`.molecule` from the
        database.

    """

    get_database: abc.Callable[[pymongo.MongoClient], stk.ValueMongoDb]
    molecule: stk.Molecule
    key: object


@pytest.fixture(
    params=(
        lambda: CaseDataData(
            get_database=lambda mongo_client: (stk.ConstructedMoleculeMongoDb(
                mongo_client=mongo_client,
                database='_stk_test_database_for_testing',
                put_lru_cache_size=0,
                get_lru_cache_size=0,
            )),
            molecule=stk.ConstructedMolecule(topology_graph=stk.polymer.Linear(
                building_blocks=(stk.BuildingBlock(
                    smiles='BrCCBr',
                    functional_groups=[stk.BromoFactory()],
                ), ),
                repeating_unit='A',
                num_repeating_units=2,
            ), ),
            key={
                'InChIKey':
                rdkit.MolToInchiKey(rdkit.MolFromSmiles(SMILES='BrCCCCBr')),
            },
        ),
import pytest
import stk
import rdkit.Chem.AllChem as rdkit

from ...utilities import MockMongoClient
from ..case_data import CaseData


@pytest.fixture(
    params=(
        CaseData(
            database=stk.ConstructedMoleculeMongoDb(
                mongo_client=MockMongoClient(),
                lru_cache_size=0,
            ),
            molecule=stk.ConstructedMolecule(topology_graph=stk.polymer.Linear(
                building_blocks=(stk.BuildingBlock(
                    smiles='BrCCBr',
                    functional_groups=[stk.BromoFactory()],
                ), ),
                repeating_unit='A',
                num_repeating_units=2,
            ), ),
            key={
                'InChIKey':
                rdkit.MolToInchiKey(rdkit.MolFromSmiles(SMILES='BrCCCCBr')),
            },
        ),
        CaseData(
            database=stk.ConstructedMoleculeMongoDb(
                mongo_client=MockMongoClient(),
def test_update_1():
    """
    Test that existing entries are updated.

    """

    database_name = '_test_update_1'
    client = pymongo.MongoClient()
    client.drop_database(database_name)

    database = stk.ConstructedMoleculeMongoDb(
        mongo_client=client,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
    )
    jsonizer = stk.ConstructedMoleculeJsonizer()

    molecule = stk.BuildingBlock(
        smiles='BrCCBr',
        functional_groups=[stk.BromoFactory()],
    ).with_canonical_atom_ordering()

    polymer = stk.ConstructedMolecule(
        topology_graph=stk.polymer.Linear(
            # Use it as a building block twice, to make sure it is
            # not repeatedly added to the molecules database.
            building_blocks=(molecule, molecule),
            repeating_unit='AB',
            num_repeating_units=2,
        ), ).with_canonical_atom_ordering()
    json = jsonizer.to_json(polymer)

    database.put(polymer)
    assert_database_state(
        state1=get_database_state(database),
        state2=DatabaseState({
            DatabaseEntry(**json['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json['matrix'])):
            1,
            DatabaseEntry(**json['buildingBlocks'][0]['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json=json['buildingBlocks'][0]['matrix'], )):
            1,
            DatabaseEntry(**to_hashable_constructed_molecule(json=json['constructedMolecule'], )):
            1,
        }),
    )

    polymer2 = polymer.with_position_matrix(position_matrix=np.zeros(
        (polymer.get_num_atoms(), 3)), )
    json2 = jsonizer.to_json(polymer2)

    database.put(polymer2)
    assert_database_state(
        state1=get_database_state(database),
        state2=DatabaseState({
            DatabaseEntry(**json['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json2['matrix'])):
            1,
            DatabaseEntry(**json['buildingBlocks'][0]['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json=json['buildingBlocks'][0]['matrix'], )):
            1,
            DatabaseEntry(**to_hashable_constructed_molecule(json=json['constructedMolecule'], )):
            1,
        }),
    )
def test_update_3():
    """
    Test that existing entries are updated.

    In this test, your first create one entry with two keys. Then
    update the entry with databases, each using 1 different key.
    No duplicate entries should be made in the database this way.

    """

    database_name = '_test_update_3'
    client = pymongo.MongoClient()
    client.drop_database(database_name)

    jsonizer1 = stk.ConstructedMoleculeJsonizer(key_makers=(
        stk.InchiKey(),
        stk.Smiles(),
    ), )
    database1 = stk.ConstructedMoleculeMongoDb(
        mongo_client=client,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
        jsonizer=jsonizer1,
    )

    jsonizer2 = stk.ConstructedMoleculeJsonizer(
        key_makers=(stk.InchiKey(), ), )
    database2 = stk.ConstructedMoleculeMongoDb(
        mongo_client=client,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
        jsonizer=jsonizer2,
    )

    jsonizer3 = stk.ConstructedMoleculeJsonizer(key_makers=(stk.Smiles(), ), )
    database3 = stk.ConstructedMoleculeMongoDb(
        mongo_client=client,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
        jsonizer=jsonizer3,
    )

    molecule = stk.BuildingBlock(
        smiles='BrCCCBr',
        functional_groups=[stk.BromoFactory()],
    ).with_canonical_atom_ordering()

    polymer1 = stk.ConstructedMolecule(
        topology_graph=stk.polymer.Linear(
            # Use it as a building block twice, to make sure it is
            # not repeatedly added to the molecules database.
            building_blocks=(molecule, molecule),
            repeating_unit='AB',
            num_repeating_units=2,
        ), ).with_canonical_atom_ordering()
    json1 = jsonizer1.to_json(polymer1)

    database1.put(polymer1)
    assert_database_state(
        state1=get_database_state(database1),
        state2=DatabaseState({
            DatabaseEntry(**json1['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json1['matrix'])):
            1,
            DatabaseEntry(**json1['buildingBlocks'][0]['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )):
            1,
            DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )):
            1,
        }),
    )

    # Should update the entry.
    polymer2 = polymer1.with_position_matrix(position_matrix=np.zeros(
        (polymer1.get_num_atoms(), 3)), )
    json2 = jsonizer2.to_json(polymer2)
    json2['matrix'] = dict(json1['matrix'])
    json2['matrix']['m'] = jsonizer2.to_json(polymer2)['matrix']['m']

    database2.put(polymer2)
    assert_database_state(
        state1=get_database_state(database1),
        state2=DatabaseState({
            DatabaseEntry(**json1['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json2['matrix'])):
            1,
            DatabaseEntry(**json1['buildingBlocks'][0]['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )):
            1,
            DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )):
            1,
        }),
    )

    # Should also update the entry.
    polymer3 = polymer1.with_position_matrix(position_matrix=np.zeros(
        (polymer1.get_num_atoms(), 3)), )
    json3 = jsonizer3.to_json(polymer3)
    json3['matrix'] = dict(json1['matrix'])
    json3['matrix']['m'] = jsonizer3.to_json(polymer3)['matrix']['m']

    database3.put(polymer3)
    assert_database_state(
        state1=get_database_state(database1),
        state2=DatabaseState({
            DatabaseEntry(**json1['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json3['matrix'])):
            1,
            DatabaseEntry(**json1['buildingBlocks'][0]['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )):
            1,
            DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )):
            1,
        }),
    )
def main():
    username = input('Username: '******'mongodb+srv://{username}:{password}@stk-vis-example.x4bkl.'
        'mongodb.net/stk?retryWrites=true&w=majority')
    database = 'stk'
    client.drop_database(database)

    constructed_db = stk.ConstructedMoleculeMongoDb(client, database)
    atoms_db = stk.ValueMongoDb(client, 'Num Atoms')
    bonds_db = stk.ValueMongoDb(client, 'Num Bonds')
    energy_db = stk.ValueMongoDb(client, 'UFF Energy')

    macrocycle = uff(
        stk.ConstructedMolecule(topology_graph=stk.macrocycle.Macrocycle(
            building_blocks=(
                stk.BuildingBlock(
                    smiles='BrCCBr',
                    functional_groups=[stk.BromoFactory()],
                ),
                stk.BuildingBlock(
                    smiles='BrNNBr',
                    functional_groups=[stk.BromoFactory()],
                ),
                stk.BuildingBlock(
                    smiles='BrOOBr',
                    functional_groups=[stk.BromoFactory()],
                ),
            ),
            repeating_unit='ABC',
            num_repeating_units=2,
        ), ))
    atoms_db.put(macrocycle, macrocycle.get_num_atoms())
    bonds_db.put(macrocycle, macrocycle.get_num_bonds())
    energy_db.put(macrocycle, uff_energy(macrocycle))
    constructed_db.put(macrocycle)

    polymer = uff(
        stk.ConstructedMolecule(topology_graph=stk.polymer.Linear(
            building_blocks=(
                stk.BuildingBlock(
                    smiles='BrCCBr',
                    functional_groups=[stk.BromoFactory()],
                ),
                stk.BuildingBlock(
                    smiles='BrNNBr',
                    functional_groups=[stk.BromoFactory()],
                ),
            ),
            repeating_unit='AB',
            num_repeating_units=4,
        ), ))
    atoms_db.put(polymer, polymer.get_num_atoms())
    bonds_db.put(polymer, polymer.get_num_bonds())
    energy_db.put(polymer, uff_energy(polymer))
    constructed_db.put(polymer)

    rotaxane = uff(
        stk.ConstructedMolecule(topology_graph=stk.rotaxane.NRotaxane(
            axle=stk.BuildingBlock.init_from_molecule(polymer),
            cycles=(stk.BuildingBlock(
                smiles=('C1=CC2=CC3=CC=C(N3)C=C4C=CC(=N4)'
                        'C=C5C=CC(=N5)C=C1N2'), ), ),
            repeating_unit='A',
            num_repeating_units=1,
        ), ))
    atoms_db.put(rotaxane, rotaxane.get_num_atoms())
    bonds_db.put(rotaxane, rotaxane.get_num_bonds())
    energy_db.put(rotaxane, uff_energy(rotaxane))
    constructed_db.put(rotaxane)

    kagome = uff(
        stk.ConstructedMolecule(topology_graph=stk.cof.Honeycomb(
            building_blocks=(
                stk.BuildingBlock('BrC=CBr', [stk.BromoFactory()]),
                stk.BuildingBlock(
                    smiles='Brc1cc(Br)cc(Br)c1',
                    functional_groups=[stk.BromoFactory()],
                ),
            ),
            lattice_size=(2, 2, 1)), ))
    atoms_db.put(kagome, kagome.get_num_atoms())
    bonds_db.put(kagome, kagome.get_num_bonds())
    energy_db.put(kagome, uff_energy(kagome))
    constructed_db.put(kagome)

    cc3 = stk.ConstructedMolecule(topology_graph=stk.cage.FourPlusSix(
        building_blocks=(
            stk.BuildingBlock(
                smiles='NC1CCCCC1N',
                functional_groups=[stk.PrimaryAminoFactory()],
            ),
            stk.BuildingBlock(
                smiles='O=Cc1cc(C=O)cc(C=O)c1',
                functional_groups=[stk.AldehydeFactory()],
            ),
        ), ), )
    cc3 = uff(cc3)
    atoms_db.put(cc3, cc3.get_num_atoms())
    bonds_db.put(cc3, cc3.get_num_bonds())
    energy_db.put(cc3, uff_energy(cc3))
    constructed_db.put(cc3)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--mongodb_uri',
                        help='The MongoDB URI for the database to connect to.',
                        default='mongodb://localhost:27017/')
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)

    # Use a random seed to get reproducible results.
    random_seed = 4
    generator = np.random.RandomState(random_seed)

    logger.info('Making building blocks.')

    # Load the building block databases.
    fluoros = tuple(
        get_building_blocks(
            path=pathlib.Path(__file__).parent / 'fluoros.txt',
            functional_group_factory=stk.FluoroFactory(),
        ))
    bromos = tuple(
        get_building_blocks(
            path=pathlib.Path(__file__).parent / 'bromos.txt',
            functional_group_factory=stk.BromoFactory(),
        ))

    initial_population = tuple(get_initial_population(fluoros, bromos))
    # Write the initial population.
    for i, record in enumerate(initial_population):
        write(record.get_molecule(), f'initial_{i}.mol')

    client = pymongo.MongoClient(args.mongodb_uri)
    db = stk.ConstructedMoleculeMongoDb(client)
    fitness_db = stk.ValueMongoDb(client, 'fitness_values')

    # Plot selections.
    generation_selector = stk.Best(
        num_batches=25,
        duplicate_molecules=False,
    )
    stk.SelectionPlotter('generation_selection', generation_selector)

    mutation_selector = stk.Roulette(
        num_batches=5,
        random_seed=generator.randint(0, 1000),
    )
    stk.SelectionPlotter('mutation_selection', mutation_selector)

    crossover_selector = stk.Roulette(
        num_batches=3,
        batch_size=2,
        random_seed=generator.randint(0, 1000),
    )
    stk.SelectionPlotter('crossover_selection', crossover_selector)

    fitness_calculator = stk.PropertyVector(
        property_functions=(
            get_num_rotatable_bonds,
            get_complexity,
            get_num_bad_rings,
        ),
        input_database=fitness_db,
        output_database=fitness_db,
    )

    fitness_normalizer = stk.NormalizerSequence(
        fitness_normalizers=(
            # Prevent division by 0 error in DivideByMean, by ensuring
            # a value of each property to be at least 1.
            stk.Add((1, 1, 1)),
            stk.DivideByMean(),
            # Obviously, because all coefficients are equal, the
            # Multiply normalizer does not need to be here. However,
            # it's here to show that you can easily change the relative
            # importance of each component of the fitness value, by
            # changing the values of the coefficients.
            stk.Multiply((1, 1, 1)),
            stk.Sum(),
            stk.Power(-1),
        ), )

    ea = stk.EvolutionaryAlgorithm(
        num_processes=1,
        initial_population=initial_population,
        fitness_calculator=fitness_calculator,
        mutator=stk.RandomMutator(
            mutators=(
                stk.RandomBuildingBlock(
                    building_blocks=fluoros,
                    is_replaceable=is_fluoro,
                    random_seed=generator.randint(0, 1000),
                ),
                stk.SimilarBuildingBlock(
                    building_blocks=fluoros,
                    is_replaceable=is_fluoro,
                    random_seed=generator.randint(0, 1000),
                ),
                stk.RandomBuildingBlock(
                    building_blocks=bromos,
                    is_replaceable=is_bromo,
                    random_seed=generator.randint(0, 1000),
                ),
                stk.SimilarBuildingBlock(
                    building_blocks=bromos,
                    is_replaceable=is_bromo,
                    random_seed=generator.randint(0, 1000),
                ),
            ),
            random_seed=generator.randint(0, 1000),
        ),
        crosser=stk.GeneticRecombination(get_gene=get_functional_group_type, ),
        generation_selector=generation_selector,
        mutation_selector=mutation_selector,
        crossover_selector=crossover_selector,
        fitness_normalizer=fitness_normalizer,
    )

    logger.info('Starting EA.')

    generations = []
    for generation in ea.get_generations(50):
        for record in generation.get_molecule_records():
            db.put(record.get_molecule())
        generations.append(generation)

    # Write the final population.
    for i, record in enumerate(generation.get_molecule_records()):
        write(record.get_molecule(), f'final_{i}.mol')

    logger.info('Making fitness plot.')

    # Normalize the fitness values across the entire EA before
    # plotting the fitness values.
    generations = tuple(
        normalize_generations(
            fitness_calculator=fitness_calculator,
            fitness_normalizer=fitness_normalizer,
            generations=generations,
        ))

    fitness_progress = stk.ProgressPlotter(
        generations=generations,
        get_property=lambda record: record.get_fitness_value(),
        y_label='Fitness Value',
    )
    fitness_progress.write('fitness_progress.png')
    fitness_progress.get_plot_data().to_csv('fitness_progress.csv')

    logger.info('Making rotatable bonds plot.')

    rotatable_bonds_progress = stk.ProgressPlotter(
        generations=generations,
        get_property=lambda record: get_num_rotatable_bonds(record.
                                                            get_molecule()),
        y_label='Number of Rotatable Bonds',
    )
    rotatable_bonds_progress.write('rotatable_bonds_progress.png')
示例#12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--mongodb_uri',
        help='The MongoDB URI for the database to connect to.',
        default='mongodb://localhost:27017/',
    )
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)

    # Use a random seed to get reproducible results.
    random_seed = 4
    generator = np.random.RandomState(random_seed)

    logger.info('Making building blocks.')

    # Load the building block databases.
    fluoros = tuple(
        get_building_blocks(
            path=pathlib.Path(__file__).parent / 'fluoros.txt',
            functional_group_factory=stk.FluoroFactory(),
        ))
    bromos = tuple(
        get_building_blocks(
            path=pathlib.Path(__file__).parent / 'bromos.txt',
            functional_group_factory=stk.BromoFactory(),
        ))

    initial_population = tuple(get_initial_population(fluoros, bromos))
    # Write the initial population.
    for i, record in enumerate(initial_population):
        write(record.get_molecule(), f'initial_{i}.mol')

    client = pymongo.MongoClient(args.mongodb_uri)
    db = stk.ConstructedMoleculeMongoDb(client)
    ea = stk.EvolutionaryAlgorithm(
        initial_population=initial_population,
        fitness_calculator=stk.FitnessFunction(get_fitness_value),
        mutator=stk.RandomMutator(
            mutators=(
                stk.RandomBuildingBlock(
                    building_blocks=fluoros,
                    is_replaceable=is_fluoro,
                    random_seed=generator.randint(0, 1000),
                ),
                stk.SimilarBuildingBlock(
                    building_blocks=fluoros,
                    is_replaceable=is_fluoro,
                    random_seed=generator.randint(0, 1000),
                ),
                stk.RandomBuildingBlock(
                    building_blocks=bromos,
                    is_replaceable=is_bromo,
                    random_seed=generator.randint(0, 1000),
                ),
                stk.SimilarBuildingBlock(
                    building_blocks=bromos,
                    is_replaceable=is_bromo,
                    random_seed=generator.randint(0, 1000),
                ),
            ),
            random_seed=generator.randint(0, 1000),
        ),
        crosser=stk.GeneticRecombination(get_gene=get_functional_group_type, ),
        generation_selector=stk.Best(
            num_batches=25,
            duplicate_molecules=False,
        ),
        mutation_selector=stk.Roulette(
            num_batches=5,
            random_seed=generator.randint(0, 1000),
        ),
        crossover_selector=stk.Roulette(
            num_batches=3,
            batch_size=2,
            random_seed=generator.randint(0, 1000),
        ),
    )

    logger.info('Starting EA.')

    generations = []
    for generation in ea.get_generations(50):
        for record in generation.get_molecule_records():
            db.put(record.get_molecule())
        generations.append(generation)

    # Write the final population.
    for i, record in enumerate(generation.get_molecule_records()):
        write(record.get_molecule(), f'final_{i}.mol')

    logger.info('Making fitness plot.')

    fitness_progress = stk.ProgressPlotter(
        generations=generations,
        get_property=lambda record: record.get_fitness_value(),
        y_label='Fitness Value',
    )
    fitness_progress.write('fitness_progress.png')

    logger.info('Making rotatable bonds plot.')

    rotatable_bonds_progress = stk.ProgressPlotter(
        generations=generations,
        get_property=get_num_rotatable_bonds,
        y_label='Number of Rotatable Bonds',
    )
    rotatable_bonds_progress.write('rotatable_bonds_progress.png')
def test_update_2(mongo_client):
    """
    Test that existing entries are updated.

    In this test, your first create two separate entries, using
    different molecule keys. You then update both at the same time,
    with a database which uses both molecule keys.

    """

    database_name = '_test_update_2'
    mongo_client.drop_database(database_name)

    jsonizer1 = stk.ConstructedMoleculeJsonizer(
        key_makers=(stk.InchiKey(), ), )
    database1 = stk.ConstructedMoleculeMongoDb(
        mongo_client=mongo_client,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
        jsonizer=jsonizer1,
    )

    jsonizer2 = stk.ConstructedMoleculeJsonizer(key_makers=(stk.Smiles(), ), )
    database2 = stk.ConstructedMoleculeMongoDb(
        mongo_client=mongo_client,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
        jsonizer=jsonizer2,
    )

    jsonizer3 = stk.ConstructedMoleculeJsonizer(key_makers=(
        stk.InchiKey(),
        stk.Smiles(),
    ), )
    database3 = stk.ConstructedMoleculeMongoDb(
        mongo_client=mongo_client,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
        jsonizer=jsonizer3,
    )

    molecule = stk.BuildingBlock(
        smiles='BrCCCBr',
        functional_groups=[stk.BromoFactory()],
    ).with_canonical_atom_ordering()

    polymer1 = stk.ConstructedMolecule(
        topology_graph=stk.polymer.Linear(
            # Use it as a building block twice, to make sure it is
            # not repeatedly added to the molecules database.
            building_blocks=(molecule, molecule),
            repeating_unit='AB',
            num_repeating_units=2,
        ), ).with_canonical_atom_ordering()
    json1 = jsonizer1.to_json(polymer1)

    database1.put(polymer1)
    assert_database_state(
        state1=get_database_state(database1),
        state2=DatabaseState({
            DatabaseEntry(**json1['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json1['matrix'])):
            1,
            DatabaseEntry(**json1['buildingBlocks'][0]['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )):
            1,
            DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )):
            1,
        }),
    )

    # Should add another entry, as a different key maker is used.
    polymer2 = polymer1.with_position_matrix(position_matrix=np.zeros(
        (polymer1.get_num_atoms(), 3)), )
    json2 = jsonizer2.to_json(polymer2)

    database2.put(polymer2)
    assert_database_state(
        state1=get_database_state(database1),
        state2=DatabaseState({
            DatabaseEntry(**json1['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json1['matrix'])):
            1,
            DatabaseEntry(**json1['buildingBlocks'][0]['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )):
            1,
            DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )):
            1,
            DatabaseEntry(**json2['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json2['matrix'])):
            1,
            DatabaseEntry(**json2['buildingBlocks'][0]['molecule']):
            1,
            DatabaseEntry(**to_hashable_matrix(json=json2['buildingBlocks'][0]['matrix'], )):
            1,
            DatabaseEntry(**to_hashable_constructed_molecule(json=json2['constructedMolecule'], )):
            1,
        }),
    )

    # Should update both entries.
    polymer3 = polymer1.with_position_matrix(position_matrix=np.zeros(
        (polymer1.get_num_atoms(), 3)), )
    json3 = jsonizer3.to_json(polymer3)

    database3.put(polymer3)
    assert_database_state(
        state1=get_database_state(database1),
        state2=DatabaseState({
            DatabaseEntry(**json3['molecule']):
            2,
            DatabaseEntry(**to_hashable_matrix(json3['matrix'])):
            2,
            DatabaseEntry(**json3['buildingBlocks'][0]['molecule']):
            2,
            DatabaseEntry(**to_hashable_matrix(json=json3['buildingBlocks'][0]['matrix'], )):
            2,
            DatabaseEntry(**to_hashable_constructed_molecule(json=json3['constructedMolecule'], )):
            2,
            DatabaseEntry(**json3['molecule']):
            2,
            DatabaseEntry(**to_hashable_matrix(json3['matrix'])):
            2,
            DatabaseEntry(**json3['buildingBlocks'][0]['molecule']):
            2,
            DatabaseEntry(**to_hashable_matrix(json=json3['buildingBlocks'][0]['matrix'], )):
            2,
            DatabaseEntry(**to_hashable_constructed_molecule(json=json3['constructedMolecule'], )):
            2,
        }),
    )
示例#14
0
def add_mixed_entries(
    client,
    database,
    key_makers,
):
    constructed_molecule_db = stk.ConstructedMoleculeMongoDb(
        mongo_client=client,
        database=database,
        molecule_collection='molecules',
        position_matrix_collection='position_matrices',
        jsonizer=stk.ConstructedMoleculeJsonizer(key_makers=key_makers, ),
    )
    num_atoms_db = stk.ValueMongoDb(
        mongo_client=client,
        collection='numAtoms',
        database=database,
        key_makers=key_makers,
    )
    num_bonds_db = stk.ValueMongoDb(
        mongo_client=client,
        collection='numBonds',
        database=database,
        key_makers=key_makers,
    )

    cage1 = stk.ConstructedMolecule(topology_graph=stk.cage.FourPlusSix(
        building_blocks=(
            stk.BuildingBlock(
                smiles='BrC1C(Br)CCCC1',
                functional_groups=[stk.BromoFactory()],
            ),
            stk.BuildingBlock(
                smiles='Brc1cc(Br)cc(Br)c1',
                functional_groups=[stk.BromoFactory()],
            ),
        ), ), )
    constructed_molecule_db.put(cage1)
    num_atoms_db.put(cage1, cage1.get_num_atoms())

    cage2 = stk.ConstructedMolecule(topology_graph=stk.cage.TwentyPlusThirty(
        building_blocks=(
            stk.BuildingBlock(
                smiles='BrC1C(Br)CCCC1',
                functional_groups=[stk.BromoFactory()],
            ),
            stk.BuildingBlock(
                smiles='Brc1cc(Br)cc(Br)c1',
                functional_groups=[stk.BromoFactory()],
            ),
        ), ), )
    constructed_molecule_db.put(cage2)
    num_atoms_db.put(cage2, cage2.get_num_atoms())

    macrocycle = stk.ConstructedMolecule(
        topology_graph=stk.macrocycle.Macrocycle(
            building_blocks=(
                stk.BuildingBlock(
                    smiles='BrCCBr',
                    functional_groups=[stk.BromoFactory()],
                ),
                stk.BuildingBlock(
                    smiles='BrNNBr',
                    functional_groups=[stk.BromoFactory()],
                ),
                stk.BuildingBlock(
                    smiles='BrOOBr',
                    functional_groups=[stk.BromoFactory()],
                ),
            ),
            repeating_unit='ABC',
            num_repeating_units=2,
        ), )
    num_atoms_db.put(macrocycle, macrocycle.get_num_atoms())

    polymer = stk.ConstructedMolecule(topology_graph=stk.polymer.Linear(
        building_blocks=(
            stk.BuildingBlock(
                smiles='BrCCBr',
                functional_groups=[stk.BromoFactory()],
            ),
            stk.BuildingBlock(
                smiles='BrNNBr',
                functional_groups=[stk.BromoFactory()],
            ),
        ),
        repeating_unit='AB',
        num_repeating_units=4,
    ), )
    rotaxane = stk.ConstructedMolecule(topology_graph=stk.rotaxane.NRotaxane(
        axle=stk.BuildingBlock.init_from_molecule(polymer),
        cycles=(stk.BuildingBlock.init_from_molecule(macrocycle), ),
        repeating_unit='A',
        num_repeating_units=1,
    ), )
    constructed_molecule_db.put(polymer)
    constructed_molecule_db.put(macrocycle)
    constructed_molecule_db.put(rotaxane)
    num_bonds_db.put(rotaxane, rotaxane.get_num_bonds())