Пример #1
0
    def test_at_least_something_matches_every_negative_example(self):
        negative_examples = []
        substructs = []
        for row in GetHeterocycleReactionSmarts():
            for smiles in row.NEGATIVE_EXAMPLE.split(','):
                if not smiles:
                    continue
                mol = Chem.MolFromSmiles(smiles)
                assert mol is not None
                negative_examples.append(mol)

            smarts = row.SMARTS
            if not smarts:
                continue

            substruct = Chem.MolFromSmarts(smarts)
            assert substruct is not None
            substructs.append(substruct)

        for mol in negative_examples:
            something_hit = False
            for substruct in substructs:
                if mol.HasSubstructMatch(substruct):
                    something_hit = True
                    break
            assert something_hit, ('nothing matched %s' %
                                   Chem.MolToSmiles(mol, isomericSmiles=True))
Пример #2
0
 def get_six_member_ring_carbon_to_nitrogen_reaction(self):
     expected_description = 'aromatic carbon in 6 membered ring'
     rxns = [
         r for r in GetHeterocycleReactionSmarts()
         if r.DESCRIPTION.startswith(expected_description)
     ]
     assert len(rxns) == 1, "expecting only one of these rules for now"
     return rxns[0]
Пример #3
0
    def test_reactions_modify_examples(self):
        for row in GetHeterocycleReactionSmarts():
            smarts = row.SMARTS
            if not smarts:
                continue

            for product in row.CONVERT_TO.split(','):
                reaction = smarts + '>>' + product
                rxn = AllChem.ReactionFromSmarts(reaction)

                for smiles in row.EXAMPLE.split(','):
                    orig_can_smi = Chem.CanonSmiles(smiles)
                    assert smiles
                    mol = Chem.MolFromSmiles(smiles)
                    for newmol in rxn.RunReactants((mol, )):
                        newmol = newmol[0]
                        isosmi = Chem.MolToSmiles(newmol, isomericSmiles=True)
                        assert_valid_change(orig_can_smi, isosmi)
Пример #4
0
    def test_smarts_match_examples(self):
        for row in GetHeterocycleReactionSmarts():
            smarts = row.SMARTS
            if not smarts:
                continue

            substruct = Chem.MolFromSmarts(smarts)
            for smiles in row.EXAMPLE.split(','):
                assert smiles
                mol = Chem.MolFromSmiles(smiles)
                assert mol.HasSubstructMatch(
                    substruct), "%s not in %s" % (smarts, smiles)

            for smiles in row.NEGATIVE_EXAMPLE.split(','):
                if not smiles:
                    continue

                mol = Chem.MolFromSmiles(smiles)
                assert not mol.HasSubstructMatch(
                    substruct), "%s should not be in %s" % (smarts, smiles)
Пример #5
0
    def test_apply_every_rule_to_every_fragment(self):
        fieldnames = ['SMILES', 'MUTATED', 'REACTION', 'DESCRIPTION']
        writer = csv.DictWriter(open('hetero_atom_mutations.csv', 'w'),
                                fieldnames)
        writer.writeheader()

        notchanged = csv.DictWriter(open('not_changed.csv', 'w'),
                                    ['SMILES', 'TITLE'])
        notchanged.writeheader()

        fragment_library = os.path.join(os.path.dirname(__file__), 'test_data',
                                        'fragments.csv')
        frag_reader = csv.DictReader(open(fragment_library))
        for row in frag_reader:
            smiles = row['SMILES']
            rdkit_mol = Chem.MolFromSmiles(smiles)
            orig_can_smi = Chem.MolToSmiles(rdkit_mol, isomericSmiles=True)

            changed = False

            for src, rxn in zip(GetHeterocycleReactionSmarts(),
                                GetHeterocycleReactions()):
                for smiles in get_unique_products(rxn, rdkit_mol):
                    assert_valid_change(orig_can_smi, smiles)
                    row = {
                        'SMILES': orig_can_smi,
                        'MUTATED': smiles,
                        'REACTION': src.SMARTS + '>>' + src.CONVERT_TO,
                        'DESCRIPTION': src.DESCRIPTION,
                    }
                    writer.writerow(row)
                    changed = True

            # record aromatic fragments that no rule changes (possible problems?)
            if not changed and has_aromatic(rdkit_mol):
                row = {'SMILES': orig_can_smi, 'TITLE': orig_can_smi}
                notchanged.writerow(row)
Пример #6
0
    def test_fuzz_atom_mutations(self):
        fragment_library = os.path.join(os.path.dirname(__file__), 'test_data',
                                        'fragments.csv')
        base, ext = os.path.splitext(os.path.basename(fragment_library))

        rand = Random(0xDEADBEEF)
        uniq_fragments = set()
        fragments = []

        frag_reader = csv.DictReader(open(fragment_library))
        for row in frag_reader:
            smiles = row['SMILES']
            rdkit_mol = Chem.MolFromSmiles(smiles)
            if not has_aromatic(rdkit_mol):
                continue
            orig_can_smi = Chem.MolToSmiles(rdkit_mol, isomericSmiles=True)
            assert orig_can_smi not in uniq_fragments
            uniq_fragments.add(orig_can_smi)
            fragments.append(orig_can_smi)

        print(len(fragments), "fragments with aromaticity")

        fieldnames = ['SMILES', 'MUTATED', 'REACTION', 'DESCRIPTION']
        writer = csv.DictWriter(
            open(base + 'hetero_atom_mutations_fuzzing.csv', 'w'), fieldnames)
        writer.writeheader()
        notchanged = csv.DictWriter(
            open(base + 'not_changed_during_fuzzing.csv', 'w'),
            ['SMILES', 'TITLE'])
        notchanged.writeheader()
        uniq_notchanged = set()

        num_trials = 1000
        # to test the full range of possible fragments
        #num_trials = 1000000

        total_generated = 0
        for i in range(num_trials):
            if i and i % 1000 == 0:
                print(i)
            if not fragments:
                print("Converged! No more fragments left!")
                break
            idx = rand.randint(0, len(fragments) - 1)
            orig_can_smi = fragments.pop(idx)
            rdkit_mol = Chem.MolFromSmiles(orig_can_smi)

            changed = False

            for src, rxn in zip(GetHeterocycleReactionSmarts(),
                                GetHeterocycleReactions()):
                for smiles in get_unique_products(rxn, rdkit_mol):
                    total_generated += 1
                    changed = True
                    assert_valid_change(orig_can_smi, smiles)
                    if smiles in uniq_fragments:
                        continue
                    uniq_fragments.add(smiles)
                    fragments.append(smiles)

                    row = {
                        'SMILES': orig_can_smi,
                        'MUTATED': smiles,
                        'REACTION': src.SMARTS + '>>' + src.CONVERT_TO,
                        'DESCRIPTION': src.DESCRIPTION,
                    }
                    writer.writerow(row)

            # record aromatic fragments that no rule changes (possible problems?)
            if not changed and orig_can_smi not in uniq_notchanged:
                uniq_notchanged.add(orig_can_smi)
                row = {'SMILES': orig_can_smi, 'TITLE': orig_can_smi}
                notchanged.writerow(row)
        print(total_generated, "generated of which", len(uniq_fragments),
              "are unique fragments generated after", num_trials, "trials")