def convert_dict_to_mols(tot_dict): """ :param tot_dict: :return: """ mol_list = [] for smiles in tot_dict: # Now generate the molecules for that mol = RWMol() atoms = tot_dict[smiles] print(atoms) for atom in atoms: atom = Atom(6) mol.AddAtom(atom) # for i in range(len(atoms)-1): # mol.AddBond(i,i+1) mol = mol.GetMol() AllChem.EmbedMolecule(mol) conf = mol.GetConformer() for i, atom in enumerate(atoms): point_3d = Point3D(atom[0], atom[1], atom[2]) conf.SetAtomPosition(i, point_3d) mol = conf.GetOwningMol() mol.SetProp("_Name", smiles) mol_list.append(mol) return mol_list
def posthoc_refine(self, scaffold): """ Averages the overlapping atoms. :param scaffold: :return: """ refined = Chem.RWMol(scaffold) refconf = refined.GetConformer() positions = defaultdict(list) for h in self.hits: hc = h.GetConformer() for k, v in self.get_positional_mapping(scaffold, h).items(): positions[k].append([ hc.GetAtomPosition(v).x, hc.GetAtomPosition(v).y, hc.GetAtomPosition(v).z ]) for i in range(scaffold.GetNumAtoms()): if len(positions[i]) == 0: continue warn( f'Atom {i} {scaffold.GetAtomWithIdx(i).GetSymbol}/{refined.GetAtomWithIdx(i).GetSymbol} in scaffold that has no positions.' ) p = np.mean(np.array(positions[i]), axis=0).astype(float) refconf.SetAtomPosition(i, Point3D(p[0], p[1], p[2])) Chem.SanitizeMol( refined, sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS + Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY, catchErrors=True) return refined
def test2_1_GetDonor2FeatVects(self): '''Case 2.1: one hydrogen with sp2 arrangement''' conf = self.mol.GetConformer(-1) case21 = GetDonor2FeatVects(conf, [1], scale=1.5) pos_heavy_atom = conf.GetAtomPosition(1) #Check if there is one vector self.assertEqual(len(case21[0]), 1, 'Incorrect number of vectors') #Check initial point of the vector self.assertListAlmostEqual(case21[0][0][0], pos_heavy_atom, 'Incorrect starting point of vector') #Check direction of the vector vec_h = conf.GetAtomPosition(8) - (pos_heavy_atom) vec = case21[0][0][1] - case21[0][0][0] self.assertListAlmostEqual(vec.CrossProduct(vec_h), Point3D(0, 0, 0), 'Incorrect direction of vector') self.assertTrue( vec.DotProduct(vec_h) > 0, 'Incorrect direction of vector') #Check length of the vector self.assertAlmostEqual(vec.Length(), 1.5, msg='Incorrect length of vector')
def test3_GetDonor2FeatVects(self): '''Case 3: no hydrogens''' conf = self.mol.GetConformer(-1) case3 = GetDonor2FeatVects(conf, [3], scale=1.5) pos_heavy_atom = conf.GetAtomPosition(3) #Check if there is one vector self.assertEqual(len(case3[0]), 1, 'Incorrect number of vectors') #Check initial point of the vector self.assertListAlmostEqual(case3[0][0][0], pos_heavy_atom, 'Incorrect starting point of vector') #Check direction of the vector vec_nbr1 = conf.GetAtomPosition(2) - pos_heavy_atom vec_nbr1.Normalize() vec_nbr2 = conf.GetAtomPosition(4) - pos_heavy_atom vec_nbr2.Normalize() avg_vec = (vec_nbr1 + vec_nbr2) vec = case3[0][0][1] - case3[0][0][0] self.assertListAlmostEqual(vec.CrossProduct(avg_vec), Point3D(0, 0, 0), 'Incorrect direction of vector') self.assertTrue( vec.DotProduct(avg_vec) < 0, 'Incorrect direction of vector') #Check length of the vector self.assertAlmostEqual(vec.Length(), 1.5, msg='Incorrect length of vector')
def _place_between(self, mol: Chem.RWMol, a: int, b: int, aromatic=True): oribond = mol.GetBondBetweenAtoms(a, b) if oribond is None: print('FAIL') return None # fail elif aromatic: bt = Chem.BondType.AROMATIC else: bt = oribond.GetBondType() idx = mol.AddAtom(Chem.Atom(6)) neoatom = mol.GetAtomWithIdx(idx) atom_a = mol.GetAtomWithIdx(a) atom_b = mol.GetAtomWithIdx(b) if aromatic: neoatom.SetIsAromatic(True) atom_a.SetIsAromatic(True) atom_b.SetIsAromatic(True) # prevent constraints neoatom.SetBoolProp('_Novel', True) atom_a.SetBoolProp('_Novel', True) atom_b.SetBoolProp('_Novel', True) # fix position conf = mol.GetConformer() pos_A = conf.GetAtomPosition(a) pos_B = conf.GetAtomPosition(b) x = pos_A.x / 2 + pos_B.x / 2 y = pos_A.y / 2 + pos_B.y / 2 z = pos_A.z / 2 + pos_B.z / 2 conf.SetAtomPosition(idx, Point3D(x, y, z)) # fix bonds mol.RemoveBond(a, b) mol.AddBond(a, idx, bt) mol.AddBond(b, idx, bt)
def test2_2_GetDonor2FeatVects(self): #Case 2.2: one hydrogen with sp3 arrangement conf = self.mol.GetConformer(-1) case22 = GetDonor2FeatVects(conf, [4], scale=1.5) pos_heavy_atom = conf.GetAtomPosition(4) #Check if there are two vectors self.assertEqual(len(case22[0]), 2, 'Incorrect number of vectors') #Check initial points of the vectors self.assertListAlmostEqual(case22[0][0][0], pos_heavy_atom, 'Incorrect starting point of vector 1') self.assertListAlmostEqual(case22[0][1][0], pos_heavy_atom, 'Incorrect starting point of vector 2') #Check directions of the vectors vec_h = conf.GetAtomPosition(11) - pos_heavy_atom vec_nbr1 = conf.GetAtomPosition(3) - pos_heavy_atom vec_nbr1.Normalize() vec_nbr2 = conf.GetAtomPosition(5) - pos_heavy_atom vec_nbr2.Normalize() avg_vec = (vec_nbr1 + vec_nbr2) vec_1 = case22[0][0][1] - case22[0][0][0] vec_2 = case22[0][1][1] - case22[0][1][0] self.assertListAlmostEqual(vec_1.CrossProduct(vec_h), Point3D(0, 0, 0), 'Incorrect direction of vector 1') self.assertTrue( vec_1.DotProduct(vec_h) > 0, 'Incorrect direction of vector 1') self.assertListAlmostEqual(vec_2.CrossProduct(avg_vec), Point3D(0, 0, 0), 'Incorrect direction of vector 2') self.assertTrue( vec_2.DotProduct(avg_vec) < 0, 'Incorrect direction of vector 2') #Check length of the vectors self.assertAlmostEqual(vec_1.Length(), 1.5, msg='Incorrect length of vector 1') self.assertAlmostEqual(vec_2.Length(), 1.5, msg='Incorrect length of vector 2')
def RDMolFromGraphs(node_list, adjacency_matrix, xyz_coord, remap=None): #xyz_mean = [0.5567917341433811, 0.10013086135351022, 0.006413393431574624] #xyz_std = [3.40762324684836, 2.6455335437418093, 2.3609727916942096] #print(xyz_coord) #xyz_coord = xyz_coord * xyz_std + xyz_mean #print(xyz_coord) # create empty editable mol object mol = Chem.RWMol() conformer = Chem.Conformer(len(node_list)) #print(xyz_coord) # add atoms to mol and keep track of index node_to_idx = {} for i in range(len(node_list)): a = Chem.Atom(node_list[i]) atom_position = Point3D(xyz_coord[i, 0], xyz_coord[i, 1], xyz_coord[i, 2]) #print(a) conformer.SetAtomPosition(i, atom_position) molIdx = mol.AddAtom(a) node_to_idx[i] = molIdx # add bonds between adjacent atoms xx, yy = np.where(np.triu(adjacency_matrix, k=1)) vv = adjacency_matrix[xx, yy] for ix, iy, bond in zip(xx, yy, vv): # add relevant bond type (there are many more of these) if remap is not None: bond = remap[bond] if bond == 1.: bond_type = Chem.rdchem.BondType.SINGLE mol.AddBond(node_to_idx[ix], node_to_idx[iy], bond_type) elif bond == 1.5: bond_type = Chem.rdchem.BondType.AROMATIC mol.AddBond(node_to_idx[ix], node_to_idx[iy], bond_type) elif bond == 2.: bond_type = Chem.rdchem.BondType.DOUBLE mol.AddBond(node_to_idx[ix], node_to_idx[iy], bond_type) elif bond == 3.: bond_type = Chem.rdchem.BondType.TRIPLE mol.AddBond(node_to_idx[ix], node_to_idx[iy], bond_type) else: raise ValueError("Invalid bond type in matrix") # Convert RWMol to Mol object mol = mol.GetMol() Chem.Kekulize(mol) Chem.SanitizeMol(mol) # Convert RWMol to SMILES # Chem.RemoveHs(mol) smiles = Chem.MolToSmiles(mol, kekuleSmiles=False) #print(smiles) # Add 3D conformation to RDKit molecule added_conformer = mol.AddConformer(conformer, assignId=True) return smiles, mol
def mol_from_json(symbols, connectivity, geometry, permute_xyz=False): """ Generate RDkit.Chem.Mol from QCSchema molecule specs. Parameters ---------- inp_molecule: dict Must include symbols and connectivity. Geometry is optional. If geometry is given, stereochemistry will be taken from coordinates Returns ------- molecule: rdkit.Chem.Mol """ from rdkit.Geometry.rdGeometry import Point3D _BO_DISPATCH_TABLE = { 1: Chem.BondType.SINGLE, 2: Chem.BondType.DOUBLE, 3: Chem.BondType.TRIPLE } geometry = geometry.reshape(int(len(geometry) / 3), 3) conformer = Chem.Conformer(len(symbols)) has_geometry = True molecule = Chem.Mol() em = Chem.RWMol(molecule) for i, s in enumerate(symbols): atom = em.AddAtom(Chem.Atom(_symbols[s])) atom_position = Point3D(geometry[i][0], geometry[i][1], geometry[i][2]) conformer.SetAtomPosition(atom, atom_position) # Add connectivity for bond in connectivity: bond_type = _BO_DISPATCH_TABLE[bond[-1]] em.AddBond(bond[0], bond[1], bond_type) molecule = em.GetMol() try: Chem.SanitizeMol(molecule) except: raise RuntimeError("Could not sanitize molecule") # Add coordinates if has_geometry: initial_conformer_id = molecule.AddConformer(conformer, assignId=True) # Assign stereochemistry from coordinates from rdkit.Chem import rdmolops rdmolops.AssignStereochemistryFrom3D(molecule, confId=initial_conformer_id, replaceExistingTags=True) if not permute_xyz: # Add a tag to keep current order molecule.SetProp("_json_geometry", '1') return molecule
def coordinates(self, positions): """ Parameter -------- positions; numpy [natoms,3] """ conf = self._mol.GetConformer() for i, p in enumerate(positions.astype(np.float64)): p3d = Point3D(p[0], p[1], p[2]) conf.SetAtomPosition(i, p3d)
def posthoc_refine(self, scaffold, indices: Optional[List[int]] = None) -> Chem.Mol: """ Averages the overlapping atoms. :param scaffold: :return: """ if indices is None: indices = list(range(scaffold.GetNumAtoms())) refined = Chem.RWMol(scaffold) refconf = refined.GetConformer() positions = defaultdict(list) # coordinates equivalence = defaultdict(list) # atom indices of hits. for h in self.hits: if h.GetProp('_Name') in self.unmatched: continue hc = h.GetConformer() for k, v in self.get_positional_mapping(scaffold, h).items(): positions[k].append([ hc.GetAtomPosition(v).x, hc.GetAtomPosition(v).y, hc.GetAtomPosition(v).z ]) equivalence[k].append(f'{h.GetProp("_Name")}.{v}') for i in range(scaffold.GetNumAtoms()): if i not in indices: continue elif len(positions[i]) == 0: refined.GetAtomWithIdx(i).SetDoubleProp('_Stdev', 0.) refined.GetAtomWithIdx(i).SetDoubleProp('_Max', 0.) refined.GetAtomWithIdx(i).SetProp('_Origin', 'none') # warn(f'Atom {i} {scaffold.GetAtomWithIdx(i).GetSymbol}/{refined.GetAtomWithIdx(i).GetSymbol} '+ \ # 'in scaffold that has no positions.') else: p = np.mean(np.array(positions[i]), axis=0).astype(float) # sd = np.mean(np.std(np.array(positions[i]), axis=0)).astype(float) ds = [np.linalg.norm(p - pi) for pi in positions[i]] sd = np.std(ds) md = np.max(ds) refined.GetAtomWithIdx(i).SetProp('_Origin', json.dumps(equivalence[i])) refined.GetAtomWithIdx(i).SetDoubleProp('_Stdev', sd) refined.GetAtomWithIdx(i).SetDoubleProp('_Max', md) if self.average_position: refconf.SetAtomPosition(i, Point3D(p[0], p[1], p[2])) Chem.SanitizeMol( refined, sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS + Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY, catchErrors=True) return refined
def test1_GetDonor2FeatVects(self): '''Case 1: two hydrogens''' conf = self.mol.GetConformer(-1) case1 = GetDonor2FeatVects(conf, [2], scale=1.5) pos_heavy_atom = conf.GetAtomPosition(2) #Check if there are two vectors self.assertEqual(len(case1[0]), 2, 'Incorrect number of vectors') #Check initial points of the vectors self.assertListAlmostEqual(case1[0][0][0], pos_heavy_atom, 'Incorrect starting point of vector 1') self.assertListAlmostEqual(case1[0][1][0], pos_heavy_atom, 'Incorrect starting point of vector 2') #Check directions of the vectors vec_h1 = conf.GetAtomPosition(9) - pos_heavy_atom vec_h2 = conf.GetAtomPosition(10) - pos_heavy_atom vec_1 = case1[0][0][1] - case1[0][0][0] vec_2 = case1[0][1][1] - case1[0][1][0] self.assertListAlmostEqual(vec_1.CrossProduct(vec_h1), Point3D(0, 0, 0), 'Incorrect direction of vector 1') self.assertTrue( vec_1.DotProduct(vec_h1) > 0, 'Incorrect direction of vector 1') self.assertListAlmostEqual(vec_2.CrossProduct(vec_h2), Point3D(0, 0, 0), 'Incorrect direction of vector 2') self.assertTrue( vec_2.DotProduct(vec_h2) > 0, 'Incorrect direction of vector 2') #Check length of the vectors self.assertAlmostEqual(vec_1.Length(), 1.5, msg='Incorrect length of vector 1') self.assertAlmostEqual(vec_2.Length(), 1.5, msg='Incorrect length of vector 2')
def estimate_geometry_of_mol(rdmol): '''Wrapper to estimate the geometry of general molecule using rdkit. The method is mainly meant for dealing with dummy atoms and metals in the molecule''' # TODO: generalize method for metals / dummy atoms rdmol.UpdatePropertyCache(strict=False) rdmol = Chem.rdchem.RWMol(rdmol) # checks for dative bonds in struct and removing atoms remove_atoms = [] neighbor_atoms = [] for atom in rdrdmol.GetAtoms(): if len(atom.GetBonds()) > 3 and atom.GetAtomicNum() > 10: remove_atoms.append(atom) neighbor_atoms.append(atom.GetNeighbors()) for atom in remove_atoms: rdmol.RemoveAtom(atom.GetIdx()) # calculating geometry AllChem.EmbedMolecule(rdmol) # fixing coordinates for fragments frags_idxs = list(Chem.rdmolops.GetMolFrags(rdmol)) conf = rdmol.GetConformer() if len(frags_idxs) > 1: for frag_idxs in frags_idxs: diff = (0.5 - np.random.rand()) * 5 * np.ones(3) for idx in frag_idxs: vec = conf.GetAtomPos(idx) vec = np.array([vec.x, vec.y, vec.z]) + diff conf.SetAtomPosition(i, Point3D(vec[0], vec[1], vec[2])) # adding dative atoms and setting coords for atom, nbors in zip(remove_atoms, neighbor_atoms): coords = np.zeros(0) for neighbor in nbors: vec = conf.GetAtomPosition(neighbor.GetIdx()) coord += np.array([vec.x, vec.y, vec.z]) rdmol.AddAtom(atom) rdmol.AddBond(atom.GetIdx(), neighbor.GetIdx(), order=Chem.rdchem.BondType.DATIVE) coords = coords / len(nbors) conf.SetAtomPosition(atom.GetIdx(), Point3D(coords[0], coords[1], coords[2])) rdmol.UpdatePropertyCache(strict=False) Chem.GetSymmSSSR(rdmol) return rdmol
def substructure_centre(mol, mol_sub): """ This function takes a molecule and a list of atom indices in that molecule and returns an RDKit Point3D representing the geometric centre of the atoms in the list """ sum = Point3D() for i in mol_sub: sum += mol.GetConformer().GetAtomPosition(i) return sum / len(mol_sub)
def add_canvas_atom(self, point): rwmol = Chem.rdchem.RWMol(self.mol) if rwmol.GetNumAtoms() == 0: point.x = 0.0 point.y = 0.0 newatom = Chem.rdchem.Atom(self.atomtype) newidx = rwmol.AddAtom(newatom) #This should only trigger if we have an empty canvas if not rwmol.GetNumConformers(): rdDepictor.Compute2DCoords(rwmol) conf = rwmol.GetConformer(0) p3 = Point3D(point.x, point.y, 0) conf.SetAtomPosition(newidx, p3) self.mol = rwmol
def _place_between(self, a: int, b: int, aromatic: Optional[bool] = None, atomic_number: int = 6) -> None: """ Places an C atom, possibly of type aromatic, between atom of index a, and of b. :param a: index of atom A :param b: index of atom B :param aromatic: bool of aromaticity (False = Single, None = copy, True = aromatic) :param atomic_number: Carbon is 6. :return: """ oribond = self.rwmol.GetBondBetweenAtoms(a, b) if oribond is None: self.journal.critical( f'FAIL. There should be a bond btween {a} and {b}') return None # fail elif aromatic is True: bt = Chem.BondType.AROMATIC elif aromatic is False: bt = Chem.BondType.SINGLE else: bt = oribond.GetBondType() idx = self.rwmol.AddAtom(Chem.Atom(atomic_number)) neoatom = self.rwmol.GetAtomWithIdx(idx) atom_a = self.rwmol.GetAtomWithIdx(a) atom_b = self.rwmol.GetAtomWithIdx(b) if aromatic: neoatom.SetIsAromatic(True) atom_a.SetIsAromatic(True) atom_b.SetIsAromatic(True) # prevent constraints neoatom.SetBoolProp('_Novel', True) atom_a.SetBoolProp('_Novel', True) atom_b.SetBoolProp('_Novel', True) # fix position conf = self.rwmol.GetConformer() pos_A = conf.GetAtomPosition(a) pos_B = conf.GetAtomPosition(b) x = pos_A.x / 2 + pos_B.x / 2 y = pos_A.y / 2 + pos_B.y / 2 z = pos_A.z / 2 + pos_B.z / 2 conf.SetAtomPosition(idx, Point3D(x, y, z)) # fix bonds self.rwmol.RemoveBond(a, b) self.rwmol.AddBond(a, idx, bt) self.rwmol.AddBond(b, idx, bt)
def add_conformer(self, mol, conformer_coordinates): """ Add a new conformation to the rdkit molecule :param conformer_coordinates: A numpy array of the coordinates to be added :param mol: The rdkit molecule instance :return: The rdkit molecule with the conformer added """ conformer = Chem.Conformer() for i, coord in enumerate(conformer_coordinates): atom_position = Point3D(*coord) conformer.SetAtomPosition(i, atom_position) mol.AddConformer(conformer, assignId=True) return mol
def _place_ring_atoms(self, mol, rings): conf = mol.GetConformer() for ring in rings: # atom addition for i in range(len(ring['elements'])): d = self._get_expansion_for_atom(ring, i) if self._is_present(mol, d['ori_i']): natom = self._get_new_index(mol, d['ori_i'], search_collapsed=False) if self._debug_draw: print(f"{natom} (formerly {d['ori_i']} existed already!") else: n = mol.AddAtom(Chem.Atom(d['element'])) natom = mol.GetAtomWithIdx(n) conf.SetAtomPosition(n, Point3D(d['x'], d['y'], d['z'])) natom.SetIntProp('_ori_i', d['ori_i']) natom.SetDoubleProp('_x', d['x']) natom.SetDoubleProp('_y', d['y']) natom.SetDoubleProp('_z', d['z']) natom.SetProp('_ori_name', d['ori_name'])
def add_conformer(rdkit_mol: Chem.Mol, conformer_coordinates: np.ndarray) -> Chem.Mol: """ Add a new conformation to the rdkit molecule. Args: rdkit_mol: The rdkit molecule instance conformer_coordinates: A numpy array of the coordinates to be added return: The rdkit molecule with the conformer added """ conformer = Chem.Conformer() for i, coord in enumerate(conformer_coordinates): atom_position = Point3D(*coord) conformer.SetAtomPosition(i, atom_position) rdkit_mol.AddConformer(conformer, assignId=True) return rdkit_mol
def z_rotation(vector, theta): """Rotates 3-D vector around z-axis""" R = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) output = np.dot(R, [vector.x, vector.y, vector.z]) return Point3D(output[0], output[1], output[2])
def collapse_ring(self, mol: Chem.Mol) -> Chem.Mol: """ Collapses a ring(s) into a single dummy atom(s). Stores data as JSON in the atom. :param mol: :return: """ self.store_positions(mol) mol = Chem.RWMol(mol) conf = mol.GetConformer() center_idxs = [] morituri = [] old2center = defaultdict(list) for atomset in mol.GetRingInfo().AtomRings(): morituri.extend(atomset) neighs = [] neighbonds = [] bonds = [] xs = [] ys = [] zs = [] elements = [] # add elemental ring c = mol.AddAtom(Chem.Atom('C')) center_idxs.append(c) central = mol.GetAtomWithIdx(c) name = mol.GetProp('_Name') if mol.HasProp('_Name') else '???' central.SetProp('_ori_name', name), # get data for storage for i in atomset: old2center[i].append(c) atom = mol.GetAtomWithIdx(i) neigh_i = [a.GetIdx() for a in atom.GetNeighbors()] neighs.append(neigh_i) bond = [mol.GetBondBetweenAtoms(i, j).GetBondType().name for j in neigh_i] bonds.append(bond) pos = conf.GetAtomPosition(i) xs.append(pos.x) ys.append(pos.y) zs.append(pos.z) elements.append(atom.GetSymbol()) # store data in elemental ring central.SetIntProp('_ori_i', -1) central.SetProp('_ori_is', json.dumps(atomset)) central.SetProp('_neighbors', json.dumps(neighs)) central.SetProp('_xs', json.dumps(xs)) central.SetProp('_ys', json.dumps(ys)) central.SetProp('_zs', json.dumps(zs)) central.SetProp('_elements', json.dumps(elements)) central.SetProp('_bonds', json.dumps(bonds)) conf.SetAtomPosition(c, Point3D(*[sum(axis) / len(axis) for axis in (xs, ys, zs)])) for atomset, center_i in zip(mol.GetRingInfo().AtomRings(), center_idxs): # bond to elemental ring central = mol.GetAtomWithIdx(center_i) neighss = json.loads(central.GetProp('_neighbors')) bondss = json.loads(central.GetProp('_bonds')) for neighs, bonds in zip(neighss, bondss): for neigh, bond in zip(neighs, bonds): if neigh not in atomset: bt = getattr(Chem.BondType, bond) if neigh not in morituri: mol.AddBond(center_i, neigh, bt) else: for other_center_i in old2center[neigh]: if center_i != other_center_i: if not mol.GetBondBetweenAtoms(center_i, other_center_i): mol.AddBond(center_i, other_center_i, bt) break else: raise ValueError(f'Cannot find what {neigh} became') for i in sorted(set(morituri), reverse=True): mol.RemoveAtom(self._get_new_index(mol, i)) return mol.GetMol()
def _join_atoms(self, combo: Chem.RWMol, anchor_A: int, anchor_B: int, distance: float, linking: bool = True): """ extrapolate positions between. by adding linkers if needed. """ conf = combo.GetConformer() pos_A = conf.GetAtomPosition(anchor_A) pos_B = conf.GetAtomPosition(anchor_B) n_new = int(round(distance / 1.22) - 1) xs = np.linspace(pos_A.x, pos_B.x, n_new + 2)[1:-1] ys = np.linspace(pos_A.y, pos_B.y, n_new + 2)[1:-1] zs = np.linspace(pos_A.z, pos_B.z, n_new + 2)[1:-1] # correcting for ring marker atoms def is_ring_atom(anchor: int) -> bool: atom = combo.GetAtomWithIdx(anchor) if atom.HasProp('_ori_i') and atom.GetIntProp('_ori_i') == -1: return True else: return False if is_ring_atom(anchor_A): distance -= 1.35 + 0.2 # Arbitrary + 0.2 to compensate for the ring not reaching (out of plane). n_new -= 1 xs = xs[1:] ys = ys[1:] zs = zs[1:] if is_ring_atom(anchor_B): distance -= 1.35 + 0.2 # Arbitrary + 0.2 to compensate for the ring not reaching (out of plane). n_new -= 1 xs = xs[:-1] ys = ys[:-1] zs = zs[:-1] # notify that things could be leary. if distance < 0: self.journal.debug( f'Two ring atoms detected to be close. Joining for now.' + ' They will be bonded/fused/spiro afterwards') # check if valid. if distance > self.joining_cutoff: msg = f'Atoms {anchor_A}+{anchor_B} are {distance} Å away. Cutoff is {self.joining_cutoff}.' self.journal.warning(msg) raise ConnectionError(msg) # place new atoms self.journal.debug( f'Molecules will be joined via atoms {anchor_A}+{anchor_B} ({distance} Å) via the addition of {n_new} atoms.' ) previous = anchor_A if linking is False and n_new > 0: self.journal.warning( f'Was going to bond {anchor_A} and {anchor_B} but reconsidered.' ) elif linking is True and n_new <= 0: combo.AddBond(previous, anchor_B, Chem.BondType.SINGLE) new_bond = combo.GetBondBetweenAtoms(previous, anchor_B) BondProvenance.set_bond(new_bond, 'main_novel') elif linking is False and n_new <= 0: combo.AddBond(previous, anchor_B, Chem.BondType.SINGLE) new_bond = combo.GetBondBetweenAtoms(previous, anchor_B) BondProvenance.set_bond(new_bond, 'other_novel') elif linking is True and n_new > 0: for i in range(n_new): # make oxygen the first and last bridging atom. if i == 0 and combo.GetAtomWithIdx( anchor_A).GetSymbol() == 'C': new_atomic = 8 elif i > 2 and i == n_new - 1 and combo.GetAtomWithIdx( anchor_B).GetSymbol() == 'C': new_atomic = 8 else: new_atomic = 6 idx = combo.AddAtom(Chem.Atom(new_atomic)) new = combo.GetAtomWithIdx(idx) new.SetBoolProp('_Novel', True) new.SetIntProp('_ori_i', 999) conf.SetAtomPosition( idx, Point3D(float(xs[i]), float(ys[i]), float(zs[i]))) combo.AddBond(idx, previous, Chem.BondType.SINGLE) new_bond = combo.GetBondBetweenAtoms(idx, previous) BondProvenance.set_bond(new_bond, 'linker') previous = idx combo.AddBond(previous, anchor_B, Chem.BondType.SINGLE) new_bond = combo.GetBondBetweenAtoms(previous, anchor_B) BondProvenance.set_bond(new_bond, 'linker') else: raise ValueError('Impossible') return combo.GetMol()
def calculate_charges(smiles, resp_type, overwrite=False): """Run the charge calculation for the molecule defined by smiles Parameters ---------- smiles: str the smiles string of the desired molecule resp_type: str the type of RESP to perform (RESP1 or RESP2) overwrite: boolean, optional, default=False overwrite the previous results if they exist Returns ------- Raises ------ InvalidMoleculeError if the smiles string is invalid """ smiles = canonicalize_smiles(smiles) iupac_name = smiles_to_iupac(smiles) mol_path = Path.joinpath(LIB_PATH, iupac_name) if not mol_path.is_dir(): raise RESPLibraryError( f"The directory for the molecule: '{smiles}' with name " f"{iupac_name} does not exist. Please run " "resp_library.prepare_charge_calculation() first.") # Initialize RESP stuff resp_type = resp_type.upper() resp_path = Path.joinpath(mol_path, resp_type) if not overwrite: if Path.joinpath(resp_path, "results").is_dir(): raise RESPLibraryError( "It appears that this partial charge calculation " "has already been attempted or performed. If you wish " "to re-run this calculation, please remove the 'results', " "'structures', and 'esp_grids' directories before proceeding.") inp, log = _initialize_resp(resp_path) # Molecule definition assert inp['smiles'] == smiles assert inp['resp']['type'].upper() == resp_type # Parse the YAML file n_conformers = inp['conformer_generation']['n_conformers'] rms_threshold = inp['conformer_generation']['rms_threshold'] energy_threshold = inp['conformer_generation'][ 'energy_threshold'] # kJ/mol conformer_seed = inp['conformer_generation']['random_seed'] charge_constraints = inp['resp']['charge_constraints'] equality_constraints = inp['resp']['equality_constraints'] point_density = inp['resp']['point_density'] vdw_scale_factors = inp['resp']['vdw_scale_factors'] if resp_type == "RESP1": esp_method = "hf" esp_basis_set = "6-31g*" elif resp_type == "RESP2": esp_method = "pw6b95" esp_basis_set = "aug-cc-pV(D+d)Z" else: raise RESPLibraryError( "Invalid RESP type. Only 'RESP1' and 'RESP2' are supported.") # Create the molecule, add H's rdmol = Chem.MolFromSmiles(smiles) rdmol = Chem.AddHs(rdmol) # Get the net charge net_charge = Chem.rdmolops.GetFormalCharge(rdmol) log.log(f"The net charge is {net_charge}.") # Get the elements elements = [a.GetSymbol() for a in rdmol.GetAtoms()] vdw_radii = { elem_sym: ele.element_from_symbol(elem_sym).radius_bondi for elem_sym in elements } # Generate conformers cids = AllChem.EmbedMultipleConfs(rdmol, numConfs=500, pruneRmsThresh=rms_threshold, randomSeed=conformer_seed) AllChem.AlignMolConformers(rdmol) if len(cids) < n_conformers: raise ValueError( "Not enough conformers found. Please reduce the " "'rms_threshold' or the 'n_conformers'. For molecules " "with < 5 atoms it may be difficult to generate more " "than 2 conformers.") # Select n_conformers at random np.random.seed(conformer_seed) conformer_ids = np.random.choice([i for i in range(len(cids))], size=n_conformers, replace=False) remove = [i for i in range(len(cids)) if i not in conformer_ids] for idx in remove: rdmol.RemoveConformer(idx) # Renumber conformers for idx, c in enumerate(rdmol.GetConformers()): c.SetId(idx) optimized_p4molecules = [] optimized_energies = [] # For each conformer, geometry optimize with psi4 for conformer in rdmol.GetConformers(): p4mol = build_p4mol(elements, conformer.GetPositions(), net_charge) p4mol, energy = _geometry_optimize(p4mol, resp_type) # Save optimized structure and energy optimized_p4molecules.append(p4mol) optimized_energies.append(energy) # Extract optimized coordinates; update coordinates RDKIT molecule coords = p4mol.geometry().to_array() * BOHR_TO_ANGSTROM for i in range(rdmol.GetNumAtoms()): x, y, z = coords[i] conformer.SetAtomPosition(i, Point3D(x, y, z)) # Check energies and remove high energy conformers _check_relative_conformer_energies(rdmol, optimized_p4molecules, optimized_energies, energy_threshold, log) # Align conformers for easier visual comparison _save_aligned_conformers(rdmol, log) # Save the conformers used for resp Path("structures/optimized_geometries.pdb").write_text( Chem.rdmolfiles.MolToPDBBlock(rdmol)) log.log( "Wrote the final optimized gemoetries to 'optimized_geometries.pdb'.\n\n" ) # Finally we do multi-conformer RESP pcm = False charges = _perform_resp( optimized_p4molecules, charge_constraints, equality_constraints, esp_method, esp_basis_set, pcm, point_density, vdw_radii, vdw_scale_factors, log, ) _write_results(elements, charges, equality_constraints, "vacuum", log) if resp_type == "RESP2": pcm = True charges = _perform_resp( optimized_p4molecules, charge_constraints, equality_constraints, esp_method, esp_basis_set, pcm, point_density, vdw_radii, vdw_scale_factors, log, ) _write_results(elements, charges, equality_constraints, "pcm", log) log.close()
def apply_shift(mol: rdkit.Mol, shift, conformer=-1): """ Shifts the coordinates of all atoms. This does not modify the molecule. A modified copy is returned. Parameters ---------- shift : :class:`numpy.array` A numpy array holding the value of the shift along each axis. conformer : :class:`int`, optional The id of the conformer to use. Returns ------- :class:`rdkit.Chem.rdchem.Mol` A copy of the molecule where the coordinates have been shifted by `shift`. """ # The function does not modify the existing conformer, as a # result a new instance is created and used for modification. conf = rdkit.Conformer(mol.GetConformer(conformer)) # For each atom, get the atomic positions from the conformer # and shift them. Create a new geometry instance from these new # coordinate values. The geometry instance is used by rdkit to # store the coordinates of atoms. Finally, set the conformers # atomic position to the values stored in this newly generated # geometry instance. for atom in mol.GetAtoms(): # Remember the id of the atom you are currently using. It # is used to change the position of the correct atom at the # end of the loop. atom_id = atom.GetIdx() # `atom_position` in an instance holding in the x, y and z # coordinates of an atom in its 'x', 'y' and 'z' # attributes. atom_position = np.array(conf.GetAtomPosition(atom_id)) # Inducing the shift. new_atom_position = atom_position + shift # Creating a new geometry instance. new_coords = Point3D(*new_atom_position) # Changes the position of the atom in the conformer to the # values stored in the new geometry instance. conf.SetAtomPosition(atom_id, new_coords) # Create a new copy of the rdkit molecule instance representing # the molecule - the original instance is not to be modified. new_mol = rdkit.Mol(mol) # The new rdkit molecule was copied from the one held in the # `mol` attribute, as result it has a copy of its conformer. To # prevent the rdkit molecule from holding multiple conformers # the `RemoveAllConformers` method is run first. The shifted # conformer is then given to the rdkit molecule, which is # returned. new_mol.RemoveAllConformers() new_mol.AddConformer(conf) return new_mol
def SampleDist(Heads, Anchors, Linkers, n=200, output_hist="initial_distances.hist", hist_threshold=0.75, min_margin=2, homo_protac=False): writer = Chem.SDWriter("random_sampling.sdf") random.seed(0) [HeadA_sdf, HeadB_sdf] = Heads #linkers with open(Linkers, 'r') as f: linkers = [Chem.MolFromSmiles(f.readline().split()[0])] #loading the heads sdf files HeadA = Chem.SDMolSupplier(HeadA_sdf)[0] HeadB = Chem.SDMolSupplier(HeadB_sdf)[0] origin = Point3D(0, 0, 0) anchor_a = HeadA.GetConformer().GetAtomPosition(Anchors[0]) translateMol(HeadA, origin, anchor_a) anchor_b = HeadB.GetConformer().GetAtomPosition(Anchors[1]) translateMol(HeadB, origin, anchor_b) for linker in linkers: #h**o protacs are protacs with the same binder twice, causing self degradation of an E3 ligase if homo_protac: head_A = linker.GetSubstructMatches(HeadA)[0] head_B = linker.GetSubstructMatches(HeadB)[1] else: mcs_A = rdFMCS.FindMCS([linker, HeadA]) mcs_patt_A = Chem.MolFromSmarts(mcs_A.smartsString) mcs_B = rdFMCS.FindMCS([linker, HeadB]) mcs_patt_B = Chem.MolFromSmarts(mcs_B.smartsString) #head_A_list = linker.GetSubstructMatches(HeadA, uniquify=False) head_A_list = linker.GetSubstructMatches(mcs_patt_A, uniquify=False) head_A_inner = HeadA.GetSubstructMatch(mcs_patt_A) #head_B_list = linker.GetSubstructMatches(HeadB, uniquify=False) head_B_list = linker.GetSubstructMatches(mcs_patt_B, uniquify=False) head_B_inner = HeadB.GetSubstructMatch(mcs_patt_B) print(Chem.MolToSmiles(linker)) print(Chem.MolToSmiles(HeadB)) print(head_B_list) if len(head_A_list) == 0 or len(head_B_list) == 0: return (None, None) histogram = {} seed = 0 b = 1 while True: b_counter = 0 for i in range(n): head_A = random.choice(head_A_list) head_B = random.choice(head_B_list) seed += 1 NewA = copy.deepcopy(HeadA) NewB = copy.deepcopy(HeadB) randomRotateMol(NewA) randomRotateMol(NewB) translateMol(NewB, Point3D(b, 0, 0), origin) #the constraints for the conformation generation using the two randomized heads cmap = { head_A[i]: NewA.GetConformer().GetAtomPosition(head_A_inner[i]) for i in range(len(head_A)) } cmap.update({ head_B[i]: NewB.GetConformer().GetAtomPosition(head_B_inner[i]) for i in range(len(head_B)) }) #only half of the atoms are required to make the constrained embedding #this is done because using all the atoms sometimes makes it impossible #to find solutions, the half is chosen randomly for each generation cmap_tag = random.sample(list(cmap), int(len(cmap) / 2)) cmap_tag = {ctag: cmap[ctag] for ctag in cmap_tag} if AllChem.EmbedMolecule(linker, coordMap=cmap_tag, randomSeed=seed, useBasicKnowledge=True, maxAttempts=1) == -1: continue if int( round( rdMolTransforms.GetBondLength( linker.GetConformer(), head_A[Anchors[0]], head_B[Anchors[1]]))) == b: writer.write(linker) b_counter += 1 histogram[b] = b_counter if b >= 10 and b_counter == 0: break b += 1 with open(output_hist, 'w') as f: for h in histogram: f.write(str(h) + "\t" + str(histogram[h]) + '\n') max_value = max([histogram[i] for i in histogram]) sum_mul = 0 sum_his = 0 for i in histogram: sum_mul += i * histogram[i] sum_his += histogram[i] if sum_his == 0: return (0, 0) else: avg_index = 1.0 * sum_mul / sum_his threshold = max_value * hist_threshold high_values = [i for i in histogram if histogram[i] >= threshold] return (min(min(high_values), avg_index - min_margin), max(max(high_values), avg_index + min_margin))
def GenConstConf(Heads, Docked_Heads, Head_Linkers, output_sdf, Anchor_A, v_atoms_sdf, n=100, homo_protac=False): writer = Chem.SDWriter(output_sdf) with open(Head_Linkers, 'r') as f: head_linkers = [Chem.MolFromSmiles(f.readline().split()[0])] #loading the heads sdf files HeadA = Chem.SDMolSupplier(Heads[0])[0] HeadB = Chem.SDMolSupplier(Heads[1])[0] docked_heads = Chem.SDMolSupplier(Docked_Heads)[0] #virtual atoms around the center of mass for the neighbor atom alignment num_atoms = docked_heads.GetConformer().GetNumAtoms() x = [] y = [] z = [] for i in range(num_atoms): x.append(docked_heads.GetConformer().GetAtomPosition(i).x) y.append(docked_heads.GetConformer().GetAtomPosition(i).y) z.append(docked_heads.GetConformer().GetAtomPosition(i).z) v1 = Point3D(sum(x) / num_atoms, sum(y) / num_atoms, sum(z) / num_atoms) v2 = Point3D( sum(x) / num_atoms + 1, sum(y) / num_atoms, sum(z) / num_atoms) v3 = Point3D( sum(x) / num_atoms, sum(y) / num_atoms + 1, sum(z) / num_atoms) virtual_atoms = Chem.MolFromSmarts('[#23][#23][#23]') Chem.rdDistGeom.EmbedMolecule(virtual_atoms) virtual_atoms.GetConformer().SetAtomPosition(1, v1) virtual_atoms.GetConformer().SetAtomPosition(0, v2) virtual_atoms.GetConformer().SetAtomPosition(2, v3) v_writer = Chem.SDWriter(v_atoms_sdf) v_writer.write(virtual_atoms) #h**o protacs are protacs with the same binder twice, causing self degradation of an E3 ligase if homo_protac: docked_A = docked_heads.GetSubstructMatches(HeadA)[0] docked_B = docked_heads.GetSubstructMatches(HeadB)[1] else: docked_A = docked_heads.GetSubstructMatch(HeadA) docked_B = docked_heads.GetSubstructMatch(HeadB) for head_linker in head_linkers: Chem.AddHs(head_linker) if homo_protac: head_A = head_linker.GetSubstructMatches(HeadA)[0] head_B = head_linker.GetSubstructMatches(HeadB)[1] else: head_A_list = head_linker.GetSubstructMatches(HeadA, uniquify=False) head_B_list = head_linker.GetSubstructMatches(HeadB, uniquify=False) i = 0 seed = 0 while i < n: if seed > 10 * n: break if seed > n and i == 0: break seed += 1 random.seed(seed) head_A = random.choice(head_A_list) head_B = random.choice(head_B_list) #amap for final alignment amap = [] for j in range(len(docked_A)): amap.append((head_A[j], docked_A[j])) for j in range(len(docked_B)): amap.append((head_B[j], docked_B[j])) #the constraints for the conformation generation using the two docked heads cmap = { head_A[j]: docked_heads.GetConformer().GetAtomPosition(docked_A[j]) for j in range(len(docked_A)) } cmap.update({ head_B[j]: docked_heads.GetConformer().GetAtomPosition(docked_B[j]) for j in range(len(docked_B)) }) #only half of the atoms are required to make the constrained embedding #this is done because using all the atoms sometimes makes it impossible #to find solutions, the half is chosen randomly for each generation cmap_tag = random.sample(list(cmap), int(len(cmap) / 2)) cmap_tag = {ctag: cmap[ctag] for ctag in cmap_tag} if AllChem.EmbedMolecule(head_linker, coordMap=cmap_tag, randomSeed=seed, useBasicKnowledge=True, maxAttempts=10) == -1: continue #final alignment to bring the new conformation to the position of the pose's heads #this is needed because the constrained embedding only applies #to distances and not to atoms position rdMolAlign.AlignMol(head_linker, docked_heads, atomMap=amap) #make sure the alignment is good enough for both heads (also to ensure the save isomer #for ambiguous rings if rmsd(head_linker, docked_heads, head_A, docked_A) < 0.5 and rmsd( head_linker, docked_heads, head_B, docked_B) < 0.5: writer.write(head_linker) i += 1 return head_A[int(Anchor_A)], v_atoms_sdf
def _get_conformer(mol: rdchem.Mol, conformer: str = "min", algo: str = "MMFF") -> rdchem.Mol: """Get molecule conformer from PDB file based on parameters provided. Params: ------- mol: rdkit.Chem.rdchem.Mol Molecule of interest, ideally with mutiple conformers. conformer: str, optional, default="min" Which conformer to select for 3D coordinates. If "min" (or "max"), then the conformer with the min (or max) energy is selected. If "first" or "last", then the first or last conformer is selected. If "avg", then the average position of all the conformers are averaged. algo: str, optional, default="MMFF" Which force field algorithm to optimize the coordinates with. Read rdkit description to determine which one is best suited for your application. Returns: -------- mol: rdkit.Chem.rdchem.Mol Molecule with conformer of interest. """ forcefields = { "MMFF": rdForceFieldHelpers.MMFFOptimizeMoleculeConfs, "UFF": rdForceFieldHelpers.UFFOptimizeMoleculeConfs } if conformer == "min": # Get idx of lowest energy conformation idx = np.argmin(forcefields[algo](mol, maxIters=0), axis=0)[1] conf = mol.GetConformers()[idx] elif conformer == "max": # Get idx of highest energy conformation idx = np.argmax(forcefields[algo](mol, maxIters=0), axis=0)[1] conf = mol.GetConformers()[idx] elif conformer == "first": conf = mol.GetConformer(0) elif conformer == "last": conf = mol.GetConformer(mol.GetNumConformers() - 1) elif conformer == "avg": allpos = [conf.GetPositions() for conf in mol.GetConformers()] avgpos = np.average(allpos, axis=0) # Set avg position as new position for all atoms conf = mol.GetConformer(0) for atom_idx in range(conf.GetNumAtoms()): atom_coords = avgpos[atom_idx] conf.SetAtomPosition(atom_idx, Point3D(atom_coords[0], \ atom_coords[1], atom_coords[2])) else: available_confs = ["min", "max", "first", "last", "avg"] raise ValueError( f"Cannot get `{conformer}` conformer. Choose from the " f"following {available_confs} conformer(s).") # Save conformer, with the position specified mol.RemoveAllConformers() mol.AddConformer(conf) return mol