Пример #1
0
def add_entries(client, database, key_makers, random_seed):
    molecule_db = stk.MoleculeMongoDb(
        mongo_client=client,
        database=database,
        molecule_collection='molecules',
        position_matrix_collection='position_matrices',
        jsonizer=stk.MoleculeJsonizer(key_makers=key_makers, ),
    )
    num_atoms_db = stk.ValueMongoDb(
        mongo_client=client,
        collection='numAtoms',
        database=database,
        key_makers=key_makers,
    )
    num_bonds_db = stk.ValueMongoDb(
        mongo_client=client,
        collection='numBonds',
        database=database,
        key_makers=key_makers,
    )
    add_value = True
    for molecule in get_molecules(200, 5):
        molecule_db.put(molecule)
        num_bonds_db.put(molecule, molecule.get_num_bonds())
        if add_value:
            num_atoms_db.put(molecule, molecule.get_num_atoms())
        add_value ^= 1
Пример #2
0
def add_constructed_molecules(
    client,
    database,
    key_makers,
):
    constructed_molecule_db = stk.ConstructedMoleculeMongoDb(
        mongo_client=client,
        database=database,
        molecule_collection='molecules',
        position_matrix_collection='position_matrices',
        jsonizer=stk.ConstructedMoleculeJsonizer(key_makers=key_makers, ),
    )
    num_atoms_db = stk.ValueMongoDb(
        mongo_client=client,
        collection='numAtoms',
        database=database,
        key_makers=key_makers,
    )
    for bb1, bb2 in zip(
            get_molecules(200, 5),
            get_molecules(200, 5),
    ):
        molecule = stk.ConstructedMolecule(topology_graph=stk.polymer.Linear(
            building_blocks=(bb1, bb2),
            repeating_unit='AB',
            num_repeating_units=1,
        ), )
        constructed_molecule_db.put(molecule)
        num_atoms_db.put(molecule, molecule.get_num_atoms())
        num_atoms_db.put(bb1, bb1.get_num_atoms())
Пример #3
0
def name_db(mongo_client):
    """
    A :class:`.ValueDatabase` for holding the names of molecules.

    """

    return stk.ValueMongoDb(
        mongo_client=mongo_client,
        database='_stk_pytest_database',
        collection='name',
        key_makers=(stk.Smiles(), ),
        indices=(stk.Smiles().get_key_name(), ),
    )
Пример #4
0
def test_get_caching(mongo_client):
    collection = '_test_get_caching'
    database_name = '_test_get_caching'
    mongo_client.drop_database(database_name)

    database = stk.ValueMongoDb(
        mongo_client=mongo_client,
        collection=collection,
        database=database_name,
    )
    molecule = stk.BuildingBlock('CCC')
    database.put(molecule, 43)
    database.get(molecule)
    database.get(molecule)

    cache_info = database._get.cache_info()
    assert cache_info.hits == 1
    assert cache_info.misses == 1
Пример #5
0
def test_update_1():
    """
    Test that existing entries are updated.

    """

    collection = '_test_update_1'
    database_name = '_test_update_1'
    client = pymongo.MongoClient()
    client.drop_database(database_name)

    database = stk.ValueMongoDb(
        mongo_client=client,
        collection=collection,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
    )

    molecule = stk.BuildingBlock('CCC')

    database.put(molecule, 12)
    assert_database_state(
        state1=get_database_state(database),
        state2=DatabaseState({
            DatabaseEntry(
                InChIKey=stk.InchiKey().get_key(molecule),
                v=12,
            ):
            1,
        }),
    )

    database.put(molecule, 43)
    assert_database_state(
        state1=get_database_state(database),
        state2=DatabaseState({
            DatabaseEntry(
                InChIKey=stk.InchiKey().get_key(molecule),
                v=43,
            ):
            1,
        }),
    )
Пример #6
0
def test_put_caching():
    collection = '_test_put_caching'
    database_name = '_test_put_caching'
    client = pymongo.MongoClient()
    client.drop_database(database_name)

    database = stk.ValueMongoDb(
        mongo_client=client,
        collection=collection,
        database=database_name,
    )
    molecule = stk.BuildingBlock('CCC')
    database.put(molecule, 43)
    database.put(molecule, 43)

    cache_info = database._put.cache_info()
    assert cache_info.hits == 1
    assert cache_info.misses == 1

    database.put(molecule, 40)
    cache_info = database._put.cache_info()
    assert cache_info.hits == 1
    assert cache_info.misses == 2
Пример #7
0
def _get_case_data(mongo_client):
    """
    Get a :class:`.CaseData` instance.

    Parameters
    ----------
    mongo_client : :class:`pymongo.MongoClient`
        The mongo client the database should connect to.

    """

    # The basic idea here is that the _counter.get_count method will
    # return a different "fitness value" each time it is called.
    # When the test runs fitness_calculator.get_fitness_value(), if
    # caching is working, the same number as before will be returned.
    # However, if caching is not working, a different number will be
    # returned as the fitness value.

    db = stk.ValueMongoDb(
        mongo_client=mongo_client,
        collection='test_caching',
        database='_stk_pytest_database',
    )

    fitness_calculator = stk.PropertyVector(
        property_functions=(_counter.get_count, ),
        input_database=db,
        output_database=db,
    )
    molecule = stk.BuildingBlock('BrCCBr')
    fitness_value = fitness_calculator.get_fitness_value(molecule)

    return CaseData(
        fitness_calculator=fitness_calculator,
        molecule=molecule,
        fitness_value=fitness_value,
    )
Пример #8
0
import pytest
import stk
import pymongo

from ..case_data import CaseData


@pytest.fixture(
    params=(
        CaseData(
            database=stk.ValueMongoDb(
                mongo_client=pymongo.MongoClient(),
                collection='values',
                database='_stk_test_database_for_testing',
                put_lru_cache_size=0,
                get_lru_cache_size=0,
            ),
            molecule=stk.BuildingBlock('BrCCBr'),
            value=12,
        ),
        CaseData(
            database=stk.ValueMongoDb(
                mongo_client=pymongo.MongoClient(),
                collection='values',
                database='_stk_test_database_for_testing',
                put_lru_cache_size=128,
                get_lru_cache_size=128,
            ),
            molecule=stk.BuildingBlock('BrCCBr'),
            value=12,
        ),
Пример #9
0
def test_update_2(mongo_client):
    """
    Test that existing entries are updated.

    In this test, you first create two separate entries, using
    different molecule keys. You then update both at the same time,
    with a database which uses both molecule keys.

    """

    collection = '_test_update_2'
    database_name = '_test_update_2'
    mongo_client.drop_database(database_name)

    database1 = stk.ValueMongoDb(
        mongo_client=mongo_client,
        collection=collection,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
        key_makers=(
            stk.InchiKey(),
        ),
    )
    database2 = stk.ValueMongoDb(
        mongo_client=mongo_client,
        collection=collection,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
        key_makers=(
            stk.Smiles(),
        ),
    )
    database3 = stk.ValueMongoDb(
        mongo_client=mongo_client,
        collection=collection,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
        key_makers=(
            stk.InchiKey(),
            stk.Smiles(),
        ),
    )

    molecule = stk.BuildingBlock('CCC')

    database1.put(molecule, 12)
    assert_database_state(
        state1=get_database_state(database1),
        state2=DatabaseState({
            DatabaseEntry(
                InChIKey=stk.InchiKey().get_key(molecule),
                v=12,
            ): 1,
        }),
    )

    # Should add another entry, as a different key maker is used.
    database2.put(molecule, 32)
    assert_database_state(
        state1=get_database_state(database1),
        state2=DatabaseState({
            DatabaseEntry(
                InChIKey=stk.InchiKey().get_key(molecule),
                v=12,
            ): 1,
            DatabaseEntry(
                SMILES=stk.Smiles().get_key(molecule),
                v=32,
            ): 1,
        }),
    )

    # Should update both entries as both key makers are used.
    database3.put(molecule, 56)
    assert_database_state(
        state1=get_database_state(database1),
        state2=DatabaseState({
            DatabaseEntry(
                InChIKey=stk.InchiKey().get_key(molecule),
                SMILES=stk.Smiles().get_key(molecule),
                v=56,
            ): 2,
        }),
    )
Пример #10
0
def test_update_3(mongo_client):
    """
    Test that existing entries are updated.

    In this test, you first create one entry with two keys. Then
    update the entry with databases, each using 1 different key.
    No duplicate entries should be made in the database this way.

    """

    collection = '_test_update_3'
    database_name = '_test_update_3'
    mongo_client.drop_database(database_name)

    database1 = stk.ValueMongoDb(
        mongo_client=mongo_client,
        collection=collection,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
        key_makers=(
            stk.InchiKey(),
            stk.Smiles(),
        ),
    )
    database2 = stk.ValueMongoDb(
        mongo_client=mongo_client,
        collection=collection,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
        key_makers=(
            stk.InchiKey(),
        ),
    )
    database3 = stk.ValueMongoDb(
        mongo_client=mongo_client,
        collection=collection,
        database=database_name,
        put_lru_cache_size=0,
        get_lru_cache_size=0,
        key_makers=(
            stk.Smiles(),
        ),
    )

    molecule = stk.BuildingBlock('CCC')

    database1.put(molecule, 12)
    assert_database_state(
        state1=get_database_state(database1),
        state2=DatabaseState({
            DatabaseEntry(
                InChIKey=stk.InchiKey().get_key(molecule),
                SMILES=stk.Smiles().get_key(molecule),
                v=12,
            ): 1
        }),
    )

    # Should update the entry.
    database2.put(molecule, 32)
    assert_database_state(
        state1=get_database_state(database1),
        state2=DatabaseState({
            DatabaseEntry(
                InChIKey=stk.InchiKey().get_key(molecule),
                SMILES=stk.Smiles().get_key(molecule),
                v=32,
            ): 1,
        }),
    )

    # Should also update the entry.
    database3.put(molecule, 62)
    assert_database_state(
        state1=get_database_state(database1),
        state2=DatabaseState({
            DatabaseEntry(
                InChIKey=stk.InchiKey().get_key(molecule),
                SMILES=stk.Smiles().get_key(molecule),
                v=62,
            ): 1,
        }),
    )
Пример #11
0
import pytest
import stk

from ..case_data import CaseData
from ...utilities import MockMongoClient


@pytest.fixture(
    params=(
        CaseData(
            database=stk.ValueMongoDb(
                mongo_client=MockMongoClient(),
                collection='values',
                lru_cache_size=0,
            ),
            molecule=stk.BuildingBlock('BrCCBr'),
            value=12,
        ),
        CaseData(
            database=stk.ValueMongoDb(
                mongo_client=MockMongoClient(),
                collection='values',
                lru_cache_size=128,
            ),
            molecule=stk.BuildingBlock('BrCCBr'),
            value=12,
        ),
    ), )
def mongo_db(request):
    return request.param
Пример #12
0
        The value to put into the database.

    """

    get_database: abc.Callable[[pymongo.MongoClient], stk.ValueMongoDb]
    molecule: stk.Molecule
    value: object


@pytest.fixture(
    params=(
        lambda: CaseDataData(
            get_database=lambda mongo_client: stk.ValueMongoDb(
                mongo_client=mongo_client,
                collection='values',
                database='_stk_test_database_for_testing',
                put_lru_cache_size=0,
                get_lru_cache_size=0,
            ),
            molecule=stk.BuildingBlock('BrCCBr'),
            value=12,
        ),
        lambda: CaseDataData(
            get_database=lambda mongo_client: stk.ValueMongoDb(
                mongo_client=mongo_client,
                collection='values',
                database='_stk_test_database_for_testing',
                put_lru_cache_size=128,
                get_lru_cache_size=128,
            ),
            molecule=stk.BuildingBlock('BrCCBr'),
Пример #13
0
def main():
    username = input('Username: '******'mongodb+srv://{username}:{password}@stk-vis-example.x4bkl.'
        'mongodb.net/stk?retryWrites=true&w=majority')
    database = 'stk'
    client.drop_database(database)

    constructed_db = stk.ConstructedMoleculeMongoDb(client, database)
    atoms_db = stk.ValueMongoDb(client, 'Num Atoms')
    bonds_db = stk.ValueMongoDb(client, 'Num Bonds')
    energy_db = stk.ValueMongoDb(client, 'UFF Energy')

    macrocycle = uff(
        stk.ConstructedMolecule(topology_graph=stk.macrocycle.Macrocycle(
            building_blocks=(
                stk.BuildingBlock(
                    smiles='BrCCBr',
                    functional_groups=[stk.BromoFactory()],
                ),
                stk.BuildingBlock(
                    smiles='BrNNBr',
                    functional_groups=[stk.BromoFactory()],
                ),
                stk.BuildingBlock(
                    smiles='BrOOBr',
                    functional_groups=[stk.BromoFactory()],
                ),
            ),
            repeating_unit='ABC',
            num_repeating_units=2,
        ), ))
    atoms_db.put(macrocycle, macrocycle.get_num_atoms())
    bonds_db.put(macrocycle, macrocycle.get_num_bonds())
    energy_db.put(macrocycle, uff_energy(macrocycle))
    constructed_db.put(macrocycle)

    polymer = uff(
        stk.ConstructedMolecule(topology_graph=stk.polymer.Linear(
            building_blocks=(
                stk.BuildingBlock(
                    smiles='BrCCBr',
                    functional_groups=[stk.BromoFactory()],
                ),
                stk.BuildingBlock(
                    smiles='BrNNBr',
                    functional_groups=[stk.BromoFactory()],
                ),
            ),
            repeating_unit='AB',
            num_repeating_units=4,
        ), ))
    atoms_db.put(polymer, polymer.get_num_atoms())
    bonds_db.put(polymer, polymer.get_num_bonds())
    energy_db.put(polymer, uff_energy(polymer))
    constructed_db.put(polymer)

    rotaxane = uff(
        stk.ConstructedMolecule(topology_graph=stk.rotaxane.NRotaxane(
            axle=stk.BuildingBlock.init_from_molecule(polymer),
            cycles=(stk.BuildingBlock(
                smiles=('C1=CC2=CC3=CC=C(N3)C=C4C=CC(=N4)'
                        'C=C5C=CC(=N5)C=C1N2'), ), ),
            repeating_unit='A',
            num_repeating_units=1,
        ), ))
    atoms_db.put(rotaxane, rotaxane.get_num_atoms())
    bonds_db.put(rotaxane, rotaxane.get_num_bonds())
    energy_db.put(rotaxane, uff_energy(rotaxane))
    constructed_db.put(rotaxane)

    kagome = uff(
        stk.ConstructedMolecule(topology_graph=stk.cof.Honeycomb(
            building_blocks=(
                stk.BuildingBlock('BrC=CBr', [stk.BromoFactory()]),
                stk.BuildingBlock(
                    smiles='Brc1cc(Br)cc(Br)c1',
                    functional_groups=[stk.BromoFactory()],
                ),
            ),
            lattice_size=(2, 2, 1)), ))
    atoms_db.put(kagome, kagome.get_num_atoms())
    bonds_db.put(kagome, kagome.get_num_bonds())
    energy_db.put(kagome, uff_energy(kagome))
    constructed_db.put(kagome)

    cc3 = stk.ConstructedMolecule(topology_graph=stk.cage.FourPlusSix(
        building_blocks=(
            stk.BuildingBlock(
                smiles='NC1CCCCC1N',
                functional_groups=[stk.PrimaryAminoFactory()],
            ),
            stk.BuildingBlock(
                smiles='O=Cc1cc(C=O)cc(C=O)c1',
                functional_groups=[stk.AldehydeFactory()],
            ),
        ), ), )
    cc3 = uff(cc3)
    atoms_db.put(cc3, cc3.get_num_atoms())
    bonds_db.put(cc3, cc3.get_num_bonds())
    energy_db.put(cc3, uff_energy(cc3))
    constructed_db.put(cc3)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--mongodb_uri',
                        help='The MongoDB URI for the database to connect to.',
                        default='mongodb://localhost:27017/')
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)

    # Use a random seed to get reproducible results.
    random_seed = 4
    generator = np.random.RandomState(random_seed)

    logger.info('Making building blocks.')

    # Load the building block databases.
    fluoros = tuple(
        get_building_blocks(
            path=pathlib.Path(__file__).parent / 'fluoros.txt',
            functional_group_factory=stk.FluoroFactory(),
        ))
    bromos = tuple(
        get_building_blocks(
            path=pathlib.Path(__file__).parent / 'bromos.txt',
            functional_group_factory=stk.BromoFactory(),
        ))

    initial_population = tuple(get_initial_population(fluoros, bromos))
    # Write the initial population.
    for i, record in enumerate(initial_population):
        write(record.get_molecule(), f'initial_{i}.mol')

    client = pymongo.MongoClient(args.mongodb_uri)
    db = stk.ConstructedMoleculeMongoDb(client)
    fitness_db = stk.ValueMongoDb(client, 'fitness_values')

    # Plot selections.
    generation_selector = stk.Best(
        num_batches=25,
        duplicate_molecules=False,
    )
    stk.SelectionPlotter('generation_selection', generation_selector)

    mutation_selector = stk.Roulette(
        num_batches=5,
        random_seed=generator.randint(0, 1000),
    )
    stk.SelectionPlotter('mutation_selection', mutation_selector)

    crossover_selector = stk.Roulette(
        num_batches=3,
        batch_size=2,
        random_seed=generator.randint(0, 1000),
    )
    stk.SelectionPlotter('crossover_selection', crossover_selector)

    fitness_calculator = stk.PropertyVector(
        property_functions=(
            get_num_rotatable_bonds,
            get_complexity,
            get_num_bad_rings,
        ),
        input_database=fitness_db,
        output_database=fitness_db,
    )

    fitness_normalizer = stk.NormalizerSequence(
        fitness_normalizers=(
            # Prevent division by 0 error in DivideByMean, by ensuring
            # a value of each property to be at least 1.
            stk.Add((1, 1, 1)),
            stk.DivideByMean(),
            # Obviously, because all coefficients are equal, the
            # Multiply normalizer does not need to be here. However,
            # it's here to show that you can easily change the relative
            # importance of each component of the fitness value, by
            # changing the values of the coefficients.
            stk.Multiply((1, 1, 1)),
            stk.Sum(),
            stk.Power(-1),
        ), )

    ea = stk.EvolutionaryAlgorithm(
        num_processes=1,
        initial_population=initial_population,
        fitness_calculator=fitness_calculator,
        mutator=stk.RandomMutator(
            mutators=(
                stk.RandomBuildingBlock(
                    building_blocks=fluoros,
                    is_replaceable=is_fluoro,
                    random_seed=generator.randint(0, 1000),
                ),
                stk.SimilarBuildingBlock(
                    building_blocks=fluoros,
                    is_replaceable=is_fluoro,
                    random_seed=generator.randint(0, 1000),
                ),
                stk.RandomBuildingBlock(
                    building_blocks=bromos,
                    is_replaceable=is_bromo,
                    random_seed=generator.randint(0, 1000),
                ),
                stk.SimilarBuildingBlock(
                    building_blocks=bromos,
                    is_replaceable=is_bromo,
                    random_seed=generator.randint(0, 1000),
                ),
            ),
            random_seed=generator.randint(0, 1000),
        ),
        crosser=stk.GeneticRecombination(get_gene=get_functional_group_type, ),
        generation_selector=generation_selector,
        mutation_selector=mutation_selector,
        crossover_selector=crossover_selector,
        fitness_normalizer=fitness_normalizer,
    )

    logger.info('Starting EA.')

    generations = []
    for generation in ea.get_generations(50):
        for record in generation.get_molecule_records():
            db.put(record.get_molecule())
        generations.append(generation)

    # Write the final population.
    for i, record in enumerate(generation.get_molecule_records()):
        write(record.get_molecule(), f'final_{i}.mol')

    logger.info('Making fitness plot.')

    # Normalize the fitness values across the entire EA before
    # plotting the fitness values.
    generations = tuple(
        normalize_generations(
            fitness_calculator=fitness_calculator,
            fitness_normalizer=fitness_normalizer,
            generations=generations,
        ))

    fitness_progress = stk.ProgressPlotter(
        generations=generations,
        get_property=lambda record: record.get_fitness_value(),
        y_label='Fitness Value',
    )
    fitness_progress.write('fitness_progress.png')
    fitness_progress.get_plot_data().to_csv('fitness_progress.csv')

    logger.info('Making rotatable bonds plot.')

    rotatable_bonds_progress = stk.ProgressPlotter(
        generations=generations,
        get_property=lambda record: get_num_rotatable_bonds(record.
                                                            get_molecule()),
        y_label='Number of Rotatable Bonds',
    )
    rotatable_bonds_progress.write('rotatable_bonds_progress.png')
Пример #15
0
def add_mixed_entries(
    client,
    database,
    key_makers,
):
    constructed_molecule_db = stk.ConstructedMoleculeMongoDb(
        mongo_client=client,
        database=database,
        molecule_collection='molecules',
        position_matrix_collection='position_matrices',
        jsonizer=stk.ConstructedMoleculeJsonizer(key_makers=key_makers, ),
    )
    num_atoms_db = stk.ValueMongoDb(
        mongo_client=client,
        collection='numAtoms',
        database=database,
        key_makers=key_makers,
    )
    num_bonds_db = stk.ValueMongoDb(
        mongo_client=client,
        collection='numBonds',
        database=database,
        key_makers=key_makers,
    )

    cage1 = stk.ConstructedMolecule(topology_graph=stk.cage.FourPlusSix(
        building_blocks=(
            stk.BuildingBlock(
                smiles='BrC1C(Br)CCCC1',
                functional_groups=[stk.BromoFactory()],
            ),
            stk.BuildingBlock(
                smiles='Brc1cc(Br)cc(Br)c1',
                functional_groups=[stk.BromoFactory()],
            ),
        ), ), )
    constructed_molecule_db.put(cage1)
    num_atoms_db.put(cage1, cage1.get_num_atoms())

    cage2 = stk.ConstructedMolecule(topology_graph=stk.cage.TwentyPlusThirty(
        building_blocks=(
            stk.BuildingBlock(
                smiles='BrC1C(Br)CCCC1',
                functional_groups=[stk.BromoFactory()],
            ),
            stk.BuildingBlock(
                smiles='Brc1cc(Br)cc(Br)c1',
                functional_groups=[stk.BromoFactory()],
            ),
        ), ), )
    constructed_molecule_db.put(cage2)
    num_atoms_db.put(cage2, cage2.get_num_atoms())

    macrocycle = stk.ConstructedMolecule(
        topology_graph=stk.macrocycle.Macrocycle(
            building_blocks=(
                stk.BuildingBlock(
                    smiles='BrCCBr',
                    functional_groups=[stk.BromoFactory()],
                ),
                stk.BuildingBlock(
                    smiles='BrNNBr',
                    functional_groups=[stk.BromoFactory()],
                ),
                stk.BuildingBlock(
                    smiles='BrOOBr',
                    functional_groups=[stk.BromoFactory()],
                ),
            ),
            repeating_unit='ABC',
            num_repeating_units=2,
        ), )
    num_atoms_db.put(macrocycle, macrocycle.get_num_atoms())

    polymer = stk.ConstructedMolecule(topology_graph=stk.polymer.Linear(
        building_blocks=(
            stk.BuildingBlock(
                smiles='BrCCBr',
                functional_groups=[stk.BromoFactory()],
            ),
            stk.BuildingBlock(
                smiles='BrNNBr',
                functional_groups=[stk.BromoFactory()],
            ),
        ),
        repeating_unit='AB',
        num_repeating_units=4,
    ), )
    rotaxane = stk.ConstructedMolecule(topology_graph=stk.rotaxane.NRotaxane(
        axle=stk.BuildingBlock.init_from_molecule(polymer),
        cycles=(stk.BuildingBlock.init_from_molecule(macrocycle), ),
        repeating_unit='A',
        num_repeating_units=1,
    ), )
    constructed_molecule_db.put(polymer)
    constructed_molecule_db.put(macrocycle)
    constructed_molecule_db.put(rotaxane)
    num_bonds_db.put(rotaxane, rotaxane.get_num_bonds())