def select_torsions(molecules_list_dict, molecule_attributes, forcefield, target_coverage=3):
    torsions_dict = {}
    smirks_torsions_counter = Counter()
    i_mol = 0
    for mol_index, mol_attr in molecule_attributes.items():
        central = []
        print(f'{i_mol:<7d}: {mol_index}')
        i_mol += 1
        mapped_smiles = mol_attr['canonical_isomeric_explicit_hydrogen_mapped_smiles']
        # round trip from QCFractal molecule to OpenEye molecule then to Off Molecule
        # this is needed for now to ensure atom indeices are consistent
        qcjson_mol = molecules_list_dict[mol_index][0]
        oemol = cmiles.utils.load_molecule(qcjson_mol)
        bonds  = []
        for bond in oemol.GetBonds():
            bonds.append((bond.GetBgnIdx(), bond.GetEndIdx()))
        bond_graph = BondGraph(bonds)
        rings = bond_graph.get_rings()
        d_rings = defaultdict(set)
        for i_ring, ring in enumerate(rings):
            for atom_idx in ring:
                d_rings[atom_idx].add(i_ring)        
        off_mol = Off_Molecule.from_openeye(oemol, allow_undefined_stereo=True)
        torsions_coverage = smirnoff_analyze_torsions(forcefield, off_mol)
        for torsion_param, torsion_idx_list in torsions_coverage.items():
            smirks = torsion_param.smirks
            for atom_indices in torsion_idx_list:
                if smirks_torsions_counter[smirks] < target_coverage and torsion_param.id in list_of_tids:
                    i, j, k, l = atom_indices
                    
                    if d_rings[j] & d_rings[k]:
                        pass
                    elif set([j,k]) not in central:
                        smirks_torsions_counter[smirks] += 1
                        canonical_torsion_index = cmiles.utils.to_canonical_label(mapped_smiles, atom_indices)
                        torsions_dict[canonical_torsion_index] = {
                            'initial_molecules': molecules_list_dict[mol_index],
                            'atom_indices': [ atom_indices ],
                            'attributes': mol_attr,
                            'tid' : torsion_param.id
                        }
                        central.append(set([j,k]))
                        print(f"  - torsion {atom_indices} added for smirks {smirks}")
                elif smirks_torsions_counter[smirks] >= target_coverage and torsion_param.id in list_of_tids:
                    print(f"  - torsion {atom_indices} skipped because {smirks} have {smirks_torsions_counter[smirks]} already")
    print("\n## Selected Torsion Coverage ##\n" + '-'*90)
    ff_torsion_param_list = forcefield.get_parameter_handler('ProperTorsions').parameters
    n_covered = 0
    for param in ff_torsion_param_list:
        if param.id in list_of_tids:
            count = smirks_torsions_counter[param.smirks]
            print(f"{param.id:5s}{param.smirks:80s} : {count:7d}")
            if count > 0:
                n_covered += 1
    print('-'*90)
    print(f'{n_covered} / {len(list_of_tids)} torsion SMIRKs covered')
    return torsions_dict
Exemplo n.º 2
0
 def find_best_dihedral_same_center_bond(self, dihedral_candidates):
     """ Find the best dihedral among candidates with same center bond
     Definition of best dihedral i-j-k-l: (From Lee-Ping)
     Temporarily disconnect all i-j bonds, then check the total number of connected atoms for each i,
     Same method applies to all candidates of l.
     The dihedral angle with the maximum connected_i + connected_j wins
     Return a single dihedral as [i, j, k, l]
     """
     if len(dihedral_candidates) == 0: return
     # check center bond are all the same
     _, center_j, center_k, _ = next(iter(dihedral_candidates))
     assert all(j==center_j and k==center_k for i,j,k,l in dihedral_candidates), "all candidates should share same center"
     # build new bond graph with only heavy atoms
     heavy_atom_bonds = [[b1, b2] for b1, b2 in self.m.bonds if self.m.elem[b1] != 'H' and self.m.elem[b2] != 'H']
     # get a new bond graph with only heavy atoms
     bond_graph = BondGraph(heavy_atom_bonds)
     # find the best i among all candidates
     i_candidates = {i for i,_,_,_ in dihedral_candidates}
     # compute and store the number of connected atoms
     n_connected_i = {}
     if len(i_candidates) == 1:
         n_connected_i[i_candidates.pop()] = 0
     else:
         # temporarily remove all i-j bonds
         for i in i_candidates:
             bond_graph.remove_bond(i, center_j)
         # compare i_candidates and find the one with most connected atom
         for i in i_candidates:
             # get all atoms connect to i in the temporary graph
             n_connected_i[i] = len(bond_graph.get_connected_nodes(i))
         print(f"n_connected for each i: {n_connected_i}")
         # add back all i-j bonds
         for i in i_candidates:
             bond_graph.add_bond(i, center_j)
     # find the best_l among all candidates
     l_candidates = {l for _,_,_,l in dihedral_candidates}
     n_connected_l = {}
     if len(l_candidates) == 1:
         n_connected_l[l_candidates.pop()] = 0
     else:
         # temporarily remove all i-j bonds
         for l in l_candidates:
             bond_graph.remove_bond(center_k, l)
         # compare i_candidates and find the one with most connected atom
         for l in l_candidates:
             # get all atoms connect to i in the temporary graph
             n_connected_l[l] = len(bond_graph.get_connected_nodes(l))
         print(f"n_connected for each l: {n_connected_l}")
     # get the best dihedral
     best_dihedral = max(dihedral_candidates, key=lambda d: n_connected_i[d[0]] + n_connected_l[d[3]])
     return best_dihedral
Exemplo n.º 3
0
 def __init__(self, molecule, skip_straight=True):
     self.m = molecule
     self.bond_graph = BondGraph(self.m.bonds)
     self.avoid_angles_set = set()
     if skip_straight:
         self.avoid_angles_set = self.get_straight_angles()
def filter_torsions_coverage(torsions_coverage, oemol):
    # Collect usuful information using BondGraph
    bonds = []
    for bond in oemol.GetBonds():
        bonds.append((bond.GetBgnIdx(), bond.GetEndIdx()))
    bond_graph = BondGraph(bonds)
    rings = bond_graph.get_rings()
    d_rings = defaultdict(set)
    for i_ring, ring in enumerate(rings):
        for atom_idx in ring:
            d_rings[atom_idx].add(i_ring)
    elem_list = []
    for atom in oemol.GetAtoms():
        elem_list.append(atom.GetAtomicNum())
    # print('elem_list',elem_list)
    # Filter out (1) unwanted in-ring rotations (2) terminal H when terminal is not specified
    filtered_torsions_coverage = defaultdict(list)
    for torsion_param, indices_list in torsions_coverage.items():
        rotatable_bond = False
        heavy_atoms = 4

        # Screening out unwanted in-ring rotations
        smirks_mod = re.sub(':2](\(.*\))?', ':2]', torsion_param.smirks)
        smirks_chopped = re.split('\:2\]', smirks_mod)[1]
        central_bond = re.split('\[.*:3\]', smirks_chopped)[0]

        if central_bond in ['-;@', '-@', ':', '=,:', '@']:
            rotatable_bond = False
        else:
            rotatable_bond = True

        if re.search("[^!]#1:1", torsion_param.smirks):
            if re.search("[^!]#1:4", torsion_param.smirks):
                heavy_atoms = 2
            else:
                heavy_atoms = 3
        elif re.search("[^!]#1:4", torsion_param.smirks):
            heavy_atoms = 3
        # validation for each indices
        for indices in indices_list:
            valid1 = False
            valid2 = False
            check_elem = [elem_list[idx] for idx in indices]
            if heavy_atoms == 4:
                if not any(elem_idx == 1 for elem_idx in check_elem):
                    valid1 = True
            elif heavy_atoms == 3:
                if check_elem.count(1) == 1:
                    valid1 = True
            elif heavy_atoms == 2:
                if not any(elem_idx == 1 for elem_idx in check_elem[1:3]):
                    valid1 = True
            if rotatable_bond == False:
                valid2 = True
            else:
                i, j, k, l = indices
                if d_rings[j] & d_rings[k]:
                    continue
                else:
                    valid2 = True

            if valid1 and valid2:
                filtered_torsions_coverage[torsion_param.id].append(indices)

    return filtered_torsions_coverage
Exemplo n.º 5
0
#!/usr/bin/env python

import os
from itertools import combinations
from forcebalance.molecule import Molecule
from bond_graph import BondGraph

mol_folder = 'processed_molecules/mol2'
total_count = 0
mol_with_bridges = []
for f in sorted(os.listdir(mol_folder)):
    fn = os.path.join(mol_folder, f)
    m = Molecule(fn)
    bg = BondGraph(m.bonds)
    rings = bg.get_rings()
    if len(rings) >= 2:
        rsets = [set(ring) for ring in rings]
        for r1, r2 in combinations(rsets, 2):
            # find all paths between the two rings
            all_paths = bg.find_all_paths(r1, r2)
            # we want only one path, and the path has len == 3 (exactly one bridge atom)
            if len(all_paths) == 1 and len(all_paths[0]) == 3:
                #print(f'found ring-bridge molecule {f}')
                mol_with_bridges.append(f)
                break
    total_count += 1


for f in mol_with_bridges:
    print(f)