def testMolpherMol(self): mol = MolpherMol(self.test_target) self.assertTrue(mol.asRDMol()) self.assertTrue(mol.asMolBlock()) mol.smiles = 'CCC' self.assertEqual(mol.getSMILES(), 'CCC') copy = mol.copy() copy.sascore = 0.54 self.assertEqual(0.54, copy.sascore) tree = ExplorationTree.create(source=mol.smiles, target='CCCNCCC') tree = ExplorationTree.create(source=mol, target='CCCNCCC') tree = ExplorationTree.create(source=mol, target=MolpherMol('CCCNCCC')) self.assertTrue(tree.hasMol(mol)) def assign(x): tree.fetchMol(mol.smiles).smiles = x self.assertRaises(RuntimeError, assign, 'CCO') # atom locking stuff mol_locked = MolpherMol(self.cymene_locked) open_positions = (0, 2, 3, 9) for idx, atom in enumerate(mol_locked.atoms): if not atom.is_locked: self.assertIn(idx, open_positions) else: self.assertTrue(atom.lock_info['NO_ADDITION']) self.assertFalse(atom.lock_info['UNLOCKED']) self.assertFalse(atom.lock_info['FULL_LOCK']) # test RDKit conversion and locking information transfer rd_mol = mol_locked.asRDMol() output = None if sys.version_info[0] < 3: output = BytesIO() else: output = StringIO() writer = Chem.SDWriter(output) writer.write(rd_mol) writer.close() temp_path = self.test_dir + "/cymene_tmp.sdf" with open(temp_path, "w") as tempfile: tempfile.write(output.getvalue()) new_cymene = MolpherMol(temp_path) os.remove(temp_path) for atm_old, atm_new in zip(mol_locked.atoms, new_cymene.atoms): self.assertTrue(atm_old.locking_mask == atm_new.locking_mask) # test init from RDKit mol_from_rdkit = MolpherMol(other=rd_mol) for atm_old, atm_new in zip(mol_locked.atoms, mol_from_rdkit.atoms): self.assertTrue(atm_old.locking_mask == atm_new.locking_mask)
def main(): iteration = [ GenerateMorphsOper() , SortMorphsOper() , MyFilterMorphs() , ExtendTreeOper() , PruneTreeOper() ] tree = ETree.create(source=cocaine, target=procaine) counter = 0 while not tree.path_found: for oper in iteration: tree.runOperation(oper) counter+=1 print("Iteration", counter) print( sorted( [ (x.getSMILES(), x.getDistToTarget()) for x in tree.leaves ], key=lambda x : x[1] ) ) path = tree.fetchPathTo(tree.params['target']) print("Path found: ") for mol in path: print(mol.getSMILES(), mol.getDistToTarget())
def testMorphingWithLocks(self): tree = ExplorationTree.create(source=MolpherMol(self.captopril)) # generate two generations of morphs and save them all to a list morphs = [] def some_collector(morph, operator): self.assertTrue(operator.name) self.assertTrue(morph.smiles) morphs.append((morph, operator)) gen_morphs = GenerateMorphsOper(collectors=[some_collector]) tree.runOperation(gen_morphs) tree.sortMorphs() tree.filterMorphs() tree.extend() tree.runOperation(gen_morphs) tree.extend() # check if all generated morphs satisfy some conditions locked_pattern = Chem.MolFromSmarts('C(=O)N1CCCC1C(=O)O') for x in morphs: self.assertTrue(x[0].smiles) self.assertTrue(x[1].name) self.assertTrue(x[0].asRDMol().HasSubstructMatch(locked_pattern))
def testOperations(self): tree = ExplorationTree.create(tree_data={ 'source' : self.test_source , 'target' : self.test_target }) iteration = [ GenerateMorphsOper() , SortMorphsOper() , FilterMorphsOper() , ExtendTreeOper() , PruneTreeOper() ] for oper in iteration: self.assertRaises(RuntimeError, lambda : oper()) fl = FindLeavesOper() for oper in iteration: tree.runOperation(oper) tree.runOperation(fl) for leaf1, leaf2, leaf3 in zip(sorted(fl.leaves), sorted(fl.tree.leaves), sorted(tree.leaves)): self.assertTrue(leaf1.smiles == leaf2.smiles == leaf3.smiles) tree.generateMorphs() tree.sortMorphs() previous = None for morph in tree.candidates: if previous: self.assertTrue(morph.dist_to_target >= previous) previous = morph.dist_to_target else: previous = morph.dist_to_target print([x.dist_to_target for x in tree.candidates]) my_callback = lambda a, b : a.getDistToTarget() > b.getDistToTarget() my_sort = SortMorphsOper(tree, my_callback) my_sort() previous = None for morph in tree.candidates: if previous: self.assertTrue(morph.dist_to_target <= previous) previous = morph.dist_to_target else: previous = morph.dist_to_target print([x.dist_to_target for x in tree.candidates]) tree.filterMorphs() selected = sum(tree.candidates_mask) clean_stuff = CleanMorphsOper() tree.runOperation(clean_stuff) self.assertEqual(len(tree.candidates), selected) tree.extend() callback = lambda x : sys.stdout.write(x.smiles + ' : ' + str(x.dist_to_target) + '\n') oper = TraverseOper(callback=callback) tree.runOperation(oper)
def __init__(self, settings, operations): self.settings = settings """a settings class (should be a subclass of `Settings`)""" self.tree = ETree.create(source=self.settings.source, target=self.settings.target) """:class:`~molpher.core.ExplorationTree.ExplorationTree` used in the search""" if self.settings.tree_params: self.tree.params = self.settings.tree_params self.tree.thread_count = self.settings.max_threads self._iteration = operations self.path = None """a list of SMILES strings if a path was found, `None` otherwise"""
def main(captopril=None): mol = MolpherMol("CC=O") frag = Chem.MolFromSmiles('c1ccccc1') oper = AddFragment(frag, [1], "Add Benzyl") oper.setOriginal(mol) morph = oper.morph() print(morph.smiles) if not captopril: captopril = MolpherMol("src/python/molpher/examples/captopril.sdf") tree = ETree.create(source=captopril) tree.morphing_operators = tree.morphing_operators + (oper,) print(tree.morphing_operators) tree.generateMorphs() print([x.smiles for x in tree.candidates])
def testTree(self): mol1 = self.test_source mol2 = self.test_target params_dict = { 'source': mol1, 'target': mol2, 'operators': (OP_ADD_BOND, OP_REMOVE_BOND, OP_MUTATE_ATOM) } params = ExplorationData(**params_dict) self.assertRaises(AttributeError, lambda: ExplorationTree()) tree_from_dict = ExplorationTree.create(tree_data=params_dict) tree_from_params = ExplorationTree.create(tree_data=params) tree_from_SMILES = ExplorationTree.create(source=mol1, target=mol2) def test_tree(tree): self.assertEqual(tree.params['source'], mol1) self.assertEqual(tree.params['target'], mol2) test_tree(tree_from_dict) test_tree(tree_from_params) test_tree(tree_from_SMILES) tree = tree_from_params # if we try to set source for non-empty tree, exception should be raised def func(): tree.params = {'source': mol2, 'target': 'C'} self.assertRaises(RuntimeError, func) tree.thread_count = 1 tree.params = {'target': 'C'} self.assertEqual(1, tree.thread_count) self.assertEqual(tree.params['source'], mol1) self.assertEqual(tree.params['target'], 'C') self.assertEqual(tree.params['operators'], params.param_dict['operators'] ) # we should still have the same opers set tree.params = params tree.thread_count = 0 # assign the original parameters back self.assertEqual(0, tree.thread_count) self.assertEqual(tree.params['source'], mol1) self.assertEqual(tree.params['target'], mol2) self.assertEqual(tree.params['operators'], params.param_dict['operators']) leaf = tree.leaves[0] self.assertRaises(RuntimeError, lambda: leaf.setSMILES('CCCC')) self.assertTrue(tree.hasMol(leaf)) # self.assertEqual(tree, leaf.tree) # FIXME: add a reliable operator for comparison between trees leaf.setDistToTarget(0.5) self.assertEqual(tree.leaves[0].getDistToTarget(), 0.5) leaf_copy = tree.leaves[0].copy() # self.assertFalse(tree.hasMol(leaf_copy)) # FIXME: add a reliable operator for comparison between trees (this should check both the SMILES and the tree ownership) self.assertEqual(leaf_copy.getDistToTarget(), 0.5) leaf_copy.setDistToTarget(0.7) self.assertEqual(leaf.getDistToTarget(), 0.5) self.assertEqual(tree.leaves[0].getDistToTarget(), 0.5) self.assertEqual(leaf_copy.getDistToTarget(), 0.7)
from molpher.core import ExplorationTree as ETree from molpher.algorithms.functions import find_path cocaine = 'CN1C2CCC1C(C(=O)OC)C(OC(=O)c1ccccc1)C2' procaine = 'O=C(OCCN(CC)CC)c1ccc(N)cc1' tree = ETree.create(source=cocaine, target=procaine) # create the tree counter = 0 while not tree.path_found: counter+=1 print("Iteration", counter) tree.generateMorphs() # generate the first generation of morphs tree.sortMorphs() # sort morphs according to their distance to target (ascending) tree.filterMorphs() # remove molecules that do not meet certain criteria tree.extend() # connect the remaining molecules to the exploration tree tree.prune() # remove branches of the tree that do not converge as_mol_grid(tree.fetchPathTo(tree.params['target']))
from molpher.core import ExplorationTree as ETree from molpher.algorithms.functions import find_path cocaine = 'CN1C2CCC1C(C(=O)OC)C(OC(=O)c1ccccc1)C2' procaine = 'O=C(OCCN(CC)CC)c1ccc(N)cc1' tree = ETree.create(source=cocaine, target=procaine) # create the tree counter = 0 while not tree.path_found: counter += 1 print("Iteration", counter) tree.generateMorphs() # generate the first generation of morphs tree.sortMorphs( ) # sort morphs according to their distance to target (ascending) tree.filterMorphs() # remove molecules that do not meet certain criteria tree.extend() # connect the remaining molecules to the exploration tree tree.prune() # remove branches of the tree that do not converge as_mol_grid(tree.fetchPathTo(tree.params['target']))
def testOperations(self): tree = ExplorationTree.create(tree_data={ 'source': self.test_source, 'target': self.test_target }) iteration = [ GenerateMorphsOper(), SortMorphsOper(), FilterMorphsOper(), ExtendTreeOper(), PruneTreeOper() ] for oper in iteration: self.assertRaises(RuntimeError, lambda: oper()) fl = FindLeavesOper() for oper in iteration: tree.runOperation(oper) tree.runOperation(fl) for leaf1, leaf2, leaf3 in zip(sorted(fl.leaves), sorted(fl.tree.leaves), sorted(tree.leaves)): self.assertTrue(leaf1.smiles == leaf2.smiles == leaf3.smiles) tree.generateMorphs() tree.sortMorphs() previous = None for morph in tree.candidates: if previous: self.assertTrue(morph.dist_to_target >= previous) previous = morph.dist_to_target else: previous = morph.dist_to_target print([x.dist_to_target for x in tree.candidates]) my_callback = lambda a, b: a.getDistToTarget() > b.getDistToTarget() my_sort = SortMorphsOper(tree, my_callback) my_sort() previous = None for morph in tree.candidates: if previous: self.assertTrue(morph.dist_to_target <= previous) previous = morph.dist_to_target else: previous = morph.dist_to_target print([x.dist_to_target for x in tree.candidates]) tree.filterMorphs() selected = sum(tree.candidates_mask) clean_stuff = CleanMorphsOper() tree.runOperation(clean_stuff) self.assertEqual(len(tree.candidates), selected) tree.extend() callback = lambda x: sys.stdout.write(x.smiles + ' : ' + str( x.dist_to_target) + '\n') oper = TraverseOper(callback=callback) tree.runOperation(oper)
def testMorphing(self): def callback(morph): callback.morphs_in_tree += 1 self.assertTrue(morph) self.assertTrue(morph.tree) if morph.getItersWithoutDistImprovement() > 3: print('Callback output:') print(morph.getSMILES(), morph.getItersWithoutDistImprovement(), morph.getDistToTarget()) if not callback.closest_mol: callback.closest_mol = morph current_dist = morph.getDistToTarget() min_dist = callback.closest_mol.getDistToTarget() if min_dist > current_dist: callback.closest_mol = morph callback.morphs_in_tree = 0 callback.closest_mol = None all_bad_structures = [] def collect_nonsyntetizable(morph, operator): if morph.sascore > 6: all_bad_structures.append(morph) class MorphingIteration(TreeOperation): parent = self def __init__(self, tree): super(MorphingIteration, self).__init__() self._tree = tree def __call__(self): print('Iteration: ', self._tree.getGenerationCount() + 1) self._tree.generateMorphs([collect_nonsyntetizable]) for mol in self._tree.candidates: self.parent.assertEqual(None, mol.tree) self._tree.sortMorphs() self._tree.filterMorphs() self._tree.extend() self._tree.prune() callback.morphs_in_tree = 0 self._tree.traverse(callback) print('Number of morphs in the tree: ', callback.morphs_in_tree) print( 'Closest molecule to target: {0} -- distance: {1}'.format( callback.closest_mol.getSMILES(), callback.closest_mol.getDistToTarget())) def getTree(self): return self._tree def setTree(self, tree): self._tree = tree tree = ExplorationTree.create( tree_data={ 'source': self.test_source, 'target': self.test_target # , 'threads' : 1 }) iterate = MorphingIteration(tree) counter = 0 while True: iterate() counter += 1 if tree.path_found: target = tree.fetchMol(self.test_target) assert target print("Path found after {0} iterations:".format(counter)) path = self.getPathToMol(tree, target) pprint([(x.smiles, x.dist_to_target, x.parent_operator) for x in path]) break child = tree.leaves[0] self.assertTrue(child.tree) self.assertTrue(tree.hasMol(child)) parent = child.getParentSMILES() tree.deleteSubtree(parent) self.assertFalse(tree.hasMol(parent)) self.assertFalse(tree.hasMol(child)) self.assertEqual(None, child.tree) self.assertEqual(parent, child.getParentSMILES()) # check if valid molecules were extracted self.assertTrue(len(all_bad_structures) > 0) for mol in all_bad_structures: self.assertTrue(mol.smiles) # check descendents def check_descs(morph): for desc_smiles in morph.descendents: desc = tree.fetchMol(desc_smiles) self.assertTrue(desc.tree) tree.traverse(check_descs)
def testMorphing(self): def callback(morph): callback.morphs_in_tree += 1 self.assertTrue(morph) self.assertTrue(morph.tree) if morph.getItersWithoutDistImprovement() > 3: print('Callback output:') print(morph.getSMILES(), morph.getItersWithoutDistImprovement(), morph.getDistToTarget()) if not callback.closest_mol: callback.closest_mol = morph current_dist = morph.getDistToTarget() min_dist = callback.closest_mol.getDistToTarget() if min_dist > current_dist: callback.closest_mol = morph callback.morphs_in_tree = 0 callback.closest_mol = None all_bad_structures = [] def collect_nonsyntetizable(morph, operator): if morph.sascore > 6: all_bad_structures.append(morph) class MorphingIteration(TreeOperation): parent = self def __init__(self, tree): super(MorphingIteration, self).__init__() self._tree = tree def __call__(self): print('Iteration: ', self._tree.getGenerationCount() + 1) self._tree.generateMorphs([collect_nonsyntetizable]) for mol in self._tree.candidates: self.parent.assertEqual(None, mol.tree) self._tree.sortMorphs() self._tree.filterMorphs() self._tree.extend() self._tree.prune() callback.morphs_in_tree = 0 self._tree.traverse(callback) print('Number of morphs in the tree: ', callback.morphs_in_tree) print('Closest molecule to target: {0} -- distance: {1}'.format( callback.closest_mol.getSMILES() , callback.closest_mol.getDistToTarget() )) def getTree(self): return self._tree def setTree(self, tree): self._tree = tree tree = ExplorationTree.create(tree_data={ 'source' : self.test_source , 'target' : self.test_target # , 'threads' : 1 }) iterate = MorphingIteration(tree) counter = 0 while True: iterate() counter += 1 if tree.path_found: target = tree.fetchMol(self.test_target) assert target print("Path found after {0} iterations:".format(counter)) path = self.getPathToMol(tree, target) pprint([(x.smiles, x.dist_to_target, x.parent_operator) for x in path]) break child = tree.leaves[0] self.assertTrue(child.tree) self.assertTrue(tree.hasMol(child)) parent = child.getParentSMILES() tree.deleteSubtree(parent) self.assertFalse(tree.hasMol(parent)) self.assertFalse(tree.hasMol(child)) self.assertEqual(None, child.tree) self.assertEqual(parent, child.getParentSMILES()) # check if valid molecules were extracted self.assertTrue(len(all_bad_structures) > 0) for mol in all_bad_structures: self.assertTrue(mol.smiles) # check descendents def check_descs(morph): for desc_smiles in morph.descendents: desc = tree.fetchMol(desc_smiles) self.assertTrue(desc.tree) tree.traverse(check_descs)
def testTree(self): mol1 = self.test_source mol2 = self.test_target params_dict = { 'source' : mol1 , 'target' : mol2 , 'operators' : (OP_ADD_BOND, OP_REMOVE_BOND, OP_MUTATE_ATOM) } params = ExplorationData(**params_dict) self.assertRaises(AttributeError, lambda : ExplorationTree()) tree_from_dict = ExplorationTree.create(tree_data=params_dict) tree_from_params = ExplorationTree.create(tree_data=params) tree_from_SMILES = ExplorationTree.create(source=mol1, target=mol2) def test_tree(tree): self.assertEqual(tree.params['source'], mol1) self.assertEqual(tree.params['target'], mol2) test_tree(tree_from_dict) test_tree(tree_from_params) test_tree(tree_from_SMILES) tree = tree_from_params # if we try to set source for non-empty tree, exception should be raised def func(): tree.params = { 'source' : mol2 , 'target' : 'C' } self.assertRaises(RuntimeError, func) tree.thread_count = 1 tree.params = { 'target' : 'C' } self.assertEqual(1, tree.thread_count) self.assertEqual(tree.params['source'], mol1) self.assertEqual(tree.params['target'], 'C') self.assertEqual(tree.params['operators'], params.param_dict['operators']) # we should still have the same opers set tree.params = params; tree.thread_count = 0 # assign the original parameters back self.assertEqual(0, tree.thread_count) self.assertEqual(tree.params['source'], mol1) self.assertEqual(tree.params['target'], mol2) self.assertEqual(tree.params['operators'], params.param_dict['operators']) leaf = tree.leaves[0] self.assertRaises(RuntimeError, lambda : leaf.setSMILES('CCCC')) self.assertTrue(tree.hasMol(leaf)) # self.assertEqual(tree, leaf.tree) # FIXME: add a reliable operator for comparison between trees leaf.setDistToTarget(0.5) self.assertEqual(tree.leaves[0].getDistToTarget(), 0.5) leaf_copy = tree.leaves[0].copy() # self.assertFalse(tree.hasMol(leaf_copy)) # FIXME: add a reliable operator for comparison between trees (this should check both the SMILES and the tree ownership) self.assertEqual(leaf_copy.getDistToTarget(), 0.5) leaf_copy.setDistToTarget(0.7) self.assertEqual(leaf.getDistToTarget(), 0.5) self.assertEqual(tree.leaves[0].getDistToTarget(), 0.5) self.assertEqual(leaf_copy.getDistToTarget(), 0.7)
def main(): cocaine = 'CN1[C@H]2CC[C@@H]1[C@@H](C(=O)OC)[C@@H](OC(=O)c1ccccc1)C2' procaine = 'O=C(OCCN(CC)CC)c1ccc(N)cc1' tree = ETree.create( source=cocaine, target=procaine ) # initialize a tree that searches for a path from cocaine to procaine # print the smiles of the source and target molecule print('Source: ', tree.params['source']) print('Target: ', tree.params['target']) # change selected parameters using a dictionary print(tree.params) tree.params = {'non_producing_survive': 2, 'weight_max': 500.0} print(tree.params) print('\n#Generating and Manipulating Morphs') print(tree.leaves ) # show the current leaves of the tree (only the source so far) print(tree.leaves[0].smiles) tree.generateMorphs() # generate new morphs print(tree.candidates) print(len(tree.candidates)) print() # get the first morph in the candidate list candidate = tree.candidates[0] # print distance to target print(tree.candidates[0].dist_to_target) # set new distance to target candidate.dist_to_target = 0.5 # look in the list of candidates and print new distance print(tree.candidates[0].dist_to_target) print() # make a copy of our molecule candidate_copy = candidate.copy() # set a new distance for the copy and verify that the original was not affected print(candidate_copy.dist_to_target) candidate_copy.dist_to_target = 0.7 print(candidate_copy.dist_to_target) print(candidate.dist_to_target) print(tree.candidates[0].dist_to_target) print('\n#Sorting and Filtering Morphs') # sort the candidates in the tree according to their distance from target tree.sortMorphs() # show results print(tree.candidates_mask) print([(x.smiles, x.dist_to_target) for x in tree.candidates]) print() # print the current candidates mask (all positions are on by default) print(tree.candidates_mask) # accept only the first three morphs in the sorted list (those with the lowest distance to target) mask = [False for x in tree.candidates_mask] mask[0] = True mask[1] = True mask[2] = True # save the new mask to the tree tree.candidates_mask = mask # show results print(tree.candidates_mask) print([ (x.smiles, x.dist_to_target) for idx, x in enumerate(tree.candidates) if tree.candidates_mask[idx] # get accepted molecules only ]) print('\n#Extending and Pruning') # get the number of generations before print(tree.generation_count) tree.extend() # connect the accepted morphs to the tree as new leaves print( sorted( # grab the new leaves as a list sorted according to their distance from target [(x.getSMILES(), x.getDistToTarget()) for x in tree.leaves], key=lambda item: item[1])) # get the number of generations after print(tree.generation_count) # check if a path was found print(tree.path_found) # run the pruning operation on the updated tree tree.prune() print('\n#Operations') class MyFilterMorphs(TreeOperation): """ A custom tree operation that accepts only the first three morphs (those with the lowest distance to target). """ def __call__(self): """ This method is called automatically by the tree. The tree this operation is being run on is accessible from `self.tree`. """ mask = [False for x in self.tree.candidates_mask] mask[0] = True mask[1] = True mask[2] = True self.tree.candidates_mask = mask tree = ETree.create(source=cocaine, target=procaine) # create the tree # this list of tree operations defines one iteration iteration = [ GenerateMorphsOper(), SortMorphsOper(), MyFilterMorphs(), ExtendTreeOper(), PruneTreeOper() ] # apply the operations in the list one by one for oper in iteration: tree.runOperation(oper) # observe the results print(tree.generation_count) print(tree.path_found) print( sorted( # grab the new leaves as a list sorted according to their distance from target [(x.getSMILES(), x.getDistToTarget()) for x in tree.leaves], key=lambda x: x[1])) print('\n#Traversing the Tree') class MyCallback(TraverseCallback): """ This callback just prints some information about the molecules in the tree. """ def __call__(self, morph): """ Method called on each morph in the tree -- starting from root to leaves. """ if not morph.getParentSMILES(): print("# Root #") else: print('# Morph #') print('Parent:', morph.getParentSMILES()) print('SMILES: ', morph.getSMILES()) print('Descendents: ', morph.getDescendants()) callback = MyCallback() # initialize a callback traverse = TraverseOper( callback=callback) # attach it to a tree traversal operation tree.runOperation(traverse) # run the operation print() def process(morph): """ Prints some information about the molecules in the tree. """ if not morph.getParentSMILES(): print("# Root #") else: print('# Morph #') print('Parent:', morph.getParentSMILES()) print('SMILES: ', morph.getSMILES()) print('Descendents: ', morph.getDescendants()) tree.traverse( process) # use the traverse method to run the callback function print('\n#Tree Snapshots') template_file = 'cocaine-procaine-template.xml' import os template_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), template_file) # create a tree from the template file tree = ETree.create(template_file) print(tree.params) # apply the tree operations for oper in iteration: tree.runOperation(oper) print( sorted( # grab the new leaves as a list sorted according to their distance from target [(x.getSMILES(), x.getDistToTarget()) for x in tree.leaves], key=lambda x: x[1])) # save the tree in a snapshot file tree.save('snapshot.xml') new_tree = ETree.create( 'snapshot.xml') # create a new tree from the saved snapshot print(new_tree.params) print( sorted( # grab the leaves in the created tree (these should be the same as those in the original tree) [(x.getSMILES(), x.getDistToTarget()) for x in new_tree.leaves], key=lambda x: x[1]))