def test_at_least_something_matches_every_negative_example(self): negative_examples = [] substructs = [] for row in GetHeterocycleReactionSmarts(): for smiles in row.NEGATIVE_EXAMPLE.split(','): if not smiles: continue mol = Chem.MolFromSmiles(smiles) assert mol is not None negative_examples.append(mol) smarts = row.SMARTS if not smarts: continue substruct = Chem.MolFromSmarts(smarts) assert substruct is not None substructs.append(substruct) for mol in negative_examples: something_hit = False for substruct in substructs: if mol.HasSubstructMatch(substruct): something_hit = True break assert something_hit, ('nothing matched %s' % Chem.MolToSmiles(mol, isomericSmiles=True))
def get_six_member_ring_carbon_to_nitrogen_reaction(self): expected_description = 'aromatic carbon in 6 membered ring' rxns = [ r for r in GetHeterocycleReactionSmarts() if r.DESCRIPTION.startswith(expected_description) ] assert len(rxns) == 1, "expecting only one of these rules for now" return rxns[0]
def test_reactions_modify_examples(self): for row in GetHeterocycleReactionSmarts(): smarts = row.SMARTS if not smarts: continue for product in row.CONVERT_TO.split(','): reaction = smarts + '>>' + product rxn = AllChem.ReactionFromSmarts(reaction) for smiles in row.EXAMPLE.split(','): orig_can_smi = Chem.CanonSmiles(smiles) assert smiles mol = Chem.MolFromSmiles(smiles) for newmol in rxn.RunReactants((mol, )): newmol = newmol[0] isosmi = Chem.MolToSmiles(newmol, isomericSmiles=True) assert_valid_change(orig_can_smi, isosmi)
def test_smarts_match_examples(self): for row in GetHeterocycleReactionSmarts(): smarts = row.SMARTS if not smarts: continue substruct = Chem.MolFromSmarts(smarts) for smiles in row.EXAMPLE.split(','): assert smiles mol = Chem.MolFromSmiles(smiles) assert mol.HasSubstructMatch( substruct), "%s not in %s" % (smarts, smiles) for smiles in row.NEGATIVE_EXAMPLE.split(','): if not smiles: continue mol = Chem.MolFromSmiles(smiles) assert not mol.HasSubstructMatch( substruct), "%s should not be in %s" % (smarts, smiles)
def test_apply_every_rule_to_every_fragment(self): fieldnames = ['SMILES', 'MUTATED', 'REACTION', 'DESCRIPTION'] writer = csv.DictWriter(open('hetero_atom_mutations.csv', 'w'), fieldnames) writer.writeheader() notchanged = csv.DictWriter(open('not_changed.csv', 'w'), ['SMILES', 'TITLE']) notchanged.writeheader() fragment_library = os.path.join(os.path.dirname(__file__), 'test_data', 'fragments.csv') frag_reader = csv.DictReader(open(fragment_library)) for row in frag_reader: smiles = row['SMILES'] rdkit_mol = Chem.MolFromSmiles(smiles) orig_can_smi = Chem.MolToSmiles(rdkit_mol, isomericSmiles=True) changed = False for src, rxn in zip(GetHeterocycleReactionSmarts(), GetHeterocycleReactions()): for smiles in get_unique_products(rxn, rdkit_mol): assert_valid_change(orig_can_smi, smiles) row = { 'SMILES': orig_can_smi, 'MUTATED': smiles, 'REACTION': src.SMARTS + '>>' + src.CONVERT_TO, 'DESCRIPTION': src.DESCRIPTION, } writer.writerow(row) changed = True # record aromatic fragments that no rule changes (possible problems?) if not changed and has_aromatic(rdkit_mol): row = {'SMILES': orig_can_smi, 'TITLE': orig_can_smi} notchanged.writerow(row)
def test_fuzz_atom_mutations(self): fragment_library = os.path.join(os.path.dirname(__file__), 'test_data', 'fragments.csv') base, ext = os.path.splitext(os.path.basename(fragment_library)) rand = Random(0xDEADBEEF) uniq_fragments = set() fragments = [] frag_reader = csv.DictReader(open(fragment_library)) for row in frag_reader: smiles = row['SMILES'] rdkit_mol = Chem.MolFromSmiles(smiles) if not has_aromatic(rdkit_mol): continue orig_can_smi = Chem.MolToSmiles(rdkit_mol, isomericSmiles=True) assert orig_can_smi not in uniq_fragments uniq_fragments.add(orig_can_smi) fragments.append(orig_can_smi) print(len(fragments), "fragments with aromaticity") fieldnames = ['SMILES', 'MUTATED', 'REACTION', 'DESCRIPTION'] writer = csv.DictWriter( open(base + 'hetero_atom_mutations_fuzzing.csv', 'w'), fieldnames) writer.writeheader() notchanged = csv.DictWriter( open(base + 'not_changed_during_fuzzing.csv', 'w'), ['SMILES', 'TITLE']) notchanged.writeheader() uniq_notchanged = set() num_trials = 1000 # to test the full range of possible fragments #num_trials = 1000000 total_generated = 0 for i in range(num_trials): if i and i % 1000 == 0: print(i) if not fragments: print("Converged! No more fragments left!") break idx = rand.randint(0, len(fragments) - 1) orig_can_smi = fragments.pop(idx) rdkit_mol = Chem.MolFromSmiles(orig_can_smi) changed = False for src, rxn in zip(GetHeterocycleReactionSmarts(), GetHeterocycleReactions()): for smiles in get_unique_products(rxn, rdkit_mol): total_generated += 1 changed = True assert_valid_change(orig_can_smi, smiles) if smiles in uniq_fragments: continue uniq_fragments.add(smiles) fragments.append(smiles) row = { 'SMILES': orig_can_smi, 'MUTATED': smiles, 'REACTION': src.SMARTS + '>>' + src.CONVERT_TO, 'DESCRIPTION': src.DESCRIPTION, } writer.writerow(row) # record aromatic fragments that no rule changes (possible problems?) if not changed and orig_can_smi not in uniq_notchanged: uniq_notchanged.add(orig_can_smi) row = {'SMILES': orig_can_smi, 'TITLE': orig_can_smi} notchanged.writerow(row) print(total_generated, "generated of which", len(uniq_fragments), "are unique fragments generated after", num_trials, "trials")