def testGetSetDihedralThroughTripleBond(self): file = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolTransforms', 'test_data', 'github1262_2.mol') m = Chem.MolFromMolFile(file, True, False) conf = m.GetConformer() rdmt.SetDihedralDeg(conf, 6, 1, 2, 9, 0.0) dihedral = rdmt.GetDihedralDeg(conf, 6, 1, 2, 9) self.assertAlmostEqual(dihedral, 0.0, 1) dist = rdmt.GetBondLength(conf, 6, 9) rdmt.SetDihedralDeg(conf, 6, 1, 2, 9, 120.0) dihedral = rdmt.GetDihedralDeg(conf, 6, 1, 2, 9) self.assertAlmostEqual(dihedral, 120.0, 1) dist2 = rdmt.GetBondLength(conf, 6, 7) self.assertAlmostEqual(dist, dist2, 1) rdmt.SetDihedralDeg(conf, 6, 1, 2, 9, 180.0) dihedral = rdmt.GetDihedralDeg(conf, 6, 1, 2, 9) self.assertAlmostEqual(dihedral, 180.0, 1) dist3 = rdmt.GetBondLength(conf, 6, 9) self.assertNotAlmostEqual(dist, dist3, 1) exceptionRaised = False try: rdmt.SetDihedralDeg(conf, 6, 0, 3, 9, 0.0) except ValueError: exceptionRaised = True self.assertTrue(exceptionRaised)
def testGetSetBondLength(self): file = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolTransforms', 'test_data', '3-cyclohexylpyridine.mol') m = Chem.MolFromMolFile(file, True, False) conf = m.GetConformer() dist = rdmt.GetBondLength(conf, 0, 19) self.failUnlessAlmostEqual(dist, 1.36, 2) rdmt.SetBondLength(conf, 0, 19, 2.5) dist = rdmt.GetBondLength(conf, 0, 19) self.failUnlessAlmostEqual(dist, 2.5, 1) rdmt.SetBondLength(conf, 19, 0, 3.0) dist = rdmt.GetBondLength(conf, 0, 19) self.failUnlessAlmostEqual(dist, 3.0, 1)
def GenRandConf(Heads, Anchors, Linkers, n=1000, output_hist="initial_distances.hist", output_sdf="random_sampling.sdf"): writer = Chem.SDWriter(output_sdf) [HeadA_sdf, HeadB_sdf] = Heads #linkers with open(Linkers, 'r') as f: linkers = [Chem.MolFromSmiles(f.readline().split()[0])] #loading the heads sdf files HeadA = Chem.SDMolSupplier(HeadA_sdf)[0] HeadB = Chem.SDMolSupplier(HeadB_sdf)[0] #anchor distances X1Y1_dist = [] for linker in linkers: Chem.AddHs(linker) amapA = MCS_AtomMap(HeadA, linker) amapB = MCS_AtomMap(HeadB, linker) anchors = [[item[1] for item in amapA if item[0] == Anchors[0]][0], [item[1] for item in amapB if item[0] == Anchors[1]][0]] i = 0 seed = 0 while i < n: seed += 1 if Chem.rdDistGeom.EmbedMolecule(linker, randomSeed=seed, useBasicKnowledge=True, maxAttempts=10) == -1: continue X1Y1_dist.append(rdMolTransforms.GetBondLength(linker.GetConformer(), anchors[0], anchors[1])) writer.write(linker) i += 1 hist = np.histogram(np.array(X1Y1_dist), range=(math.floor(min(X1Y1_dist)), math.ceil(max(X1Y1_dist))), bins=2*int(math.ceil(max(X1Y1_dist)-math.floor(min(X1Y1_dist))))) with open(output_hist, 'w') as f: f.write("range is: " + str([min(X1Y1_dist), max(X1Y1_dist)]) + '\n') for i in range(len(hist[0])): f.write(str(hist[0][i]) + '\t' + str(hist[1][i]) + '\n') return (min(X1Y1_dist), max(X1Y1_dist))
def check_bonds_are_plausible_and_atom_overlap(aemol): aemol.to_rdkit() rdmol = aemol.rdmol for i_idx, i_atom in enumerate(rdmol.GetAtoms()): for j_idx, j_atom in enumerate(rdmol.GetAtoms()): if i_idx == j_idx: continue bond = rdmol.GetBondBetweenAtoms(i_idx, j_idx) if bond is not None: conf = rdmol.GetConformer() bond_length = Chem.GetBondLength(conf, i_idx, j_idx) if bond_length >= 0.90 and bond_length <= 2.60: continue else: return False else: i_atom_pos = np.array([rdmol.GetConformer().GetAtomPosition(i_idx).x, rdmol.GetConformer().GetAtomPosition(i_idx).y, rdmol.GetConformer().GetAtomPosition(i_idx).z]) j_atom_pos = np.array([rdmol.GetConformer().GetAtomPosition(j_idx).x, rdmol.GetConformer().GetAtomPosition(j_idx).y, rdmol.GetConformer().GetAtomPosition(j_idx).z]) average_dist = np.linalg.norm(i_atom_pos - j_atom_pos) if average_dist < 1.4: return False return True
def GetRingSubstituentPosition(mol, ring, ring_substituent): """ mol: rdMol ring: list (ring index) ring_substitutent: tuples (ring atom, substituent) Return: alpha: float range [0,pi] beta: float range [0,2*pi) """ molconformer = mol.GetConformer() bondlength = rdMolTransforms.GetBondLength(molconformer, *ring_substituent) ring_coord = [list(molconformer.GetAtomPosition(node)) for node in ring] substituent_coord = [list(molconformer.GetAtomPosition(node)) for node in ring_substituent] ringcenter = np.mean(ring_coord, axis=0) ring_coord_ = np.array(ring_coord) - ringcenter substituent_coord_ = np.array(substituent_coord) - ringcenter S = np.diff(substituent_coord_,axis=0) s = S/np.linalg.norm(S) n = GetNormal(ring_coord_) alpha = np.asscalar(np.arccos(fixzero(np.dot(s,n)))) R = np.array(substituent_coord_)[0] U = R - np.dot(R,n)*n u = U/np.linalg.norm(U) v = np.cross(n,u) su = fixzero(np.dot(s,u)) sv = fixzero(np.dot(s,v)) beta = np.asscalar(np.arctan2(-sv,su)) return alpha, beta
def testUFFDistanceConstraints(self): m = Chem.MolFromMolBlock(self.molB, True, False) ff = ChemicalForceFields.UFFGetMoleculeForceField(m) self.assertTrue(ff) ff.UFFAddDistanceConstraint(1, 3, False, 2.0, 2.0, 1.0e5) r = ff.Minimize() self.assertTrue(r == 0) conf = m.GetConformer() dist = rdMolTransforms.GetBondLength(conf, 1, 3) self.assertTrue(dist > 1.99) ff = ChemicalForceFields.UFFGetMoleculeForceField(m) self.assertTrue(ff) ff.UFFAddDistanceConstraint(1, 3, True, -0.2, 0.2, 1.0e5) r = ff.Minimize() self.assertTrue(r == 0) conf = m.GetConformer() dist = rdMolTransforms.GetBondLength(conf, 1, 3) self.assertTrue(dist > 1.79)
def test_bond_lengths(acetone): """ Make sure we can measure bond lengths for a given conformer and the distances match those given by rdkit. """ bond_lengths = acetone.measure_bonds() rdkit_mol = acetone.to_rdkit() for bond, length in bond_lengths.items(): assert (pytest.approx( rdMolTransforms.GetBondLength(rdkit_mol.GetConformer(), *bond)) == length)
def get_bonds(f, atid=None): #rdkit mol = Chem.MolFromMolFile(f, removeHs=False, sanitize=False) c = mol.GetConformer() bonds = [] if atid == None: atid = enumerateBonds(f) for i in range(len(atid)): bonds.append(rdt.GetBondLength(c, atid[i][0], atid[i][1])) return np.array(bonds), atid
def testMMFFDistanceConstraints(self): m = Chem.MolFromMolBlock(self.molB, True, False) mp = ChemicalForceFields.MMFFGetMoleculeProperties(m) ff = ChemicalForceFields.MMFFGetMoleculeForceField(m, mp) self.failUnless(ff) ff.MMFFAddDistanceConstraint(1, 3, False, 2.0, 2.0, 1.0e5) r = ff.Minimize() self.failUnless(r == 0) conf = m.GetConformer() dist = rdMolTransforms.GetBondLength(conf, 1, 3) self.failUnless(dist > 1.99) ff = ChemicalForceFields.MMFFGetMoleculeForceField(m, mp) self.failUnless(ff) ff.MMFFAddDistanceConstraint(1, 3, True, -0.2, 0.2, 1.0e5) r = ff.Minimize() self.failUnless(r == 0) conf = m.GetConformer() dist = rdMolTransforms.GetBondLength(conf, 1, 3) self.failUnless(dist > 1.79)
def molQuality(mol): natom=mol.GetNumAtoms() conf=mol.GetConformer() minimum=1e100 for i in range(0,natom-1): for j in range(i+1,natom): if mol.GetBondBetweenAtoms(i,j)==None: dist=rdMolTransforms.GetBondLength(conf,i,j) if dist<minimum: minimum=dist return minimum
def GetRingBondLength(mol, ringpath): """ Get bond length of the ring bonds Input: mol: rdmol ringidx: list Return: bondlength: list """ N = len(ringpath) ringbond = [[ringpath[i], ringpath[(i + 1) % N]] for i in range(N)] molconf = mol.GetConformer() bondlength = [rdMolTransforms.GetBondLength(molconf, *b) for b in ringbond] return bondlength
def SetRingSubstituentPosition(mol, ring, ring_substituent, alpha, beta): """ Update ring subtituent position. Bond length is fixed. mol: rdmol ring: list (ring index) ring_substituent: list (ring atom index, substituent index) alpha: float (0, np.pi) beta: float (0,2*np.pi ) Return: coordinate: list """ coordinate = [] molconformer = mol.GetConformer() bondlength = rdMolTransforms.GetBondLength(molconformer, *ring_substituent) ring_coord = [list(molconformer.GetAtomPosition(node)) for node in ring] substituent_coord = [ list(molconformer.GetAtomPosition(node)) for node in ring_substituent ] ringcenter = np.mean(ring_coord, axis=0) ring_coord_ = np.array(ring_coord) - ringcenter substituent_coord_ = np.array(substituent_coord) - ringcenter S = np.diff(substituent_coord) # vector from ring atom to substituent s = S / np.linalg.norm(S) n = GetNormal(ring_coord_) # Normal vector R = np.array(substituent_coord[0]) # ring atom U = R - np.dot(R, n) * n u = U / np.linalg.norm(U) v = np.cross(n, u) x = fixzero(bondlength * np.sin(alpha) * np.cos(-beta)) y = fixzero(bondlength * np.sin(alpha) * np.sin(-beta)) z = fixzero(bondlength * np.cos(alpha)) b = np.array([x, y, z]) T = np.array([u, v, n]).T ring_substituent_pos = np.matmul(T, b) + np.array(substituent_coord)[0]
def SampleDist(Heads, Anchors, Linkers, n=200, output_hist="initial_distances.hist", hist_threshold=0.75, min_margin=2, homo_protac=False): writer = Chem.SDWriter("random_sampling.sdf") random.seed(0) [HeadA_sdf, HeadB_sdf] = Heads #linkers with open(Linkers, 'r') as f: linkers = [Chem.MolFromSmiles(f.readline().split()[0])] #loading the heads sdf files HeadA = Chem.SDMolSupplier(HeadA_sdf)[0] HeadB = Chem.SDMolSupplier(HeadB_sdf)[0] origin = Point3D(0, 0, 0) anchor_a = HeadA.GetConformer().GetAtomPosition(Anchors[0]) translateMol(HeadA, origin, anchor_a) anchor_b = HeadB.GetConformer().GetAtomPosition(Anchors[1]) translateMol(HeadB, origin, anchor_b) for linker in linkers: #h**o protacs are protacs with the same binder twice, causing self degradation of an E3 ligase if homo_protac: head_A = linker.GetSubstructMatches(HeadA)[0] head_B = linker.GetSubstructMatches(HeadB)[1] else: mcs_A = rdFMCS.FindMCS([linker, HeadA]) mcs_patt_A = Chem.MolFromSmarts(mcs_A.smartsString) mcs_B = rdFMCS.FindMCS([linker, HeadB]) mcs_patt_B = Chem.MolFromSmarts(mcs_B.smartsString) #head_A_list = linker.GetSubstructMatches(HeadA, uniquify=False) head_A_list = linker.GetSubstructMatches(mcs_patt_A, uniquify=False) head_A_inner = HeadA.GetSubstructMatch(mcs_patt_A) #head_B_list = linker.GetSubstructMatches(HeadB, uniquify=False) head_B_list = linker.GetSubstructMatches(mcs_patt_B, uniquify=False) head_B_inner = HeadB.GetSubstructMatch(mcs_patt_B) print(Chem.MolToSmiles(linker)) print(Chem.MolToSmiles(HeadB)) print(head_B_list) if len(head_A_list) == 0 or len(head_B_list) == 0: return (None, None) histogram = {} seed = 0 b = 1 while True: b_counter = 0 for i in range(n): head_A = random.choice(head_A_list) head_B = random.choice(head_B_list) seed += 1 NewA = copy.deepcopy(HeadA) NewB = copy.deepcopy(HeadB) randomRotateMol(NewA) randomRotateMol(NewB) translateMol(NewB, Point3D(b, 0, 0), origin) #the constraints for the conformation generation using the two randomized heads cmap = { head_A[i]: NewA.GetConformer().GetAtomPosition(head_A_inner[i]) for i in range(len(head_A)) } cmap.update({ head_B[i]: NewB.GetConformer().GetAtomPosition(head_B_inner[i]) for i in range(len(head_B)) }) #only half of the atoms are required to make the constrained embedding #this is done because using all the atoms sometimes makes it impossible #to find solutions, the half is chosen randomly for each generation cmap_tag = random.sample(list(cmap), int(len(cmap) / 2)) cmap_tag = {ctag: cmap[ctag] for ctag in cmap_tag} if AllChem.EmbedMolecule(linker, coordMap=cmap_tag, randomSeed=seed, useBasicKnowledge=True, maxAttempts=1) == -1: continue if int( round( rdMolTransforms.GetBondLength( linker.GetConformer(), head_A[Anchors[0]], head_B[Anchors[1]]))) == b: writer.write(linker) b_counter += 1 histogram[b] = b_counter if b >= 10 and b_counter == 0: break b += 1 with open(output_hist, 'w') as f: for h in histogram: f.write(str(h) + "\t" + str(histogram[h]) + '\n') max_value = max([histogram[i] for i in histogram]) sum_mul = 0 sum_his = 0 for i in histogram: sum_mul += i * histogram[i] sum_his += histogram[i] if sum_his == 0: return (0, 0) else: avg_index = 1.0 * sum_mul / sum_his threshold = max_value * hist_threshold high_values = [i for i in histogram if histogram[i] >= threshold] return (min(min(high_values), avg_index - min_margin), max(max(high_values), avg_index + min_margin))
def exp_rules_output(mol, args, log): if args.exp_rules == 'Ir_bidentate_x3': passing = True ligand_links = [] atom_indexes = [] for atom in mol.GetAtoms(): # Finds the Ir atom and gets the atom types and indexes of all its neighbours if atom.GetSymbol() in args.metal: atomic_number = possible_atoms.index(atom.GetSymbol()) atom.SetAtomicNum(atomic_number) for atom in mol.GetAtoms(): if atom.GetAtomicNum() == atomic_number: metal_idx = atom.GetIdx() for x in atom.GetNeighbors(): ligand_links.append(x.GetSymbol()) atom_indexes.append(x.GetIdx()) # I need to get the only 3D conformer generated in that mol object for rdMolTransforms mol_conf = mol.GetConformer(0) # This part will identify the pairs of C and N atoms that are part of the same Ph_Py ligand. # The shape of the atom pairs is '[[C1_ATOM_NUMBER, N1_ATOM_NUMBER],[C2, N2],...]'. # This information is required for the subsequent filtering process based on angles if len(atom_indexes) == args.complex_coord[0]: ligand_atoms = [] for i, _ in enumerate(atom_indexes): # This is a filter that excludes molecules that fell apart during DFT geometry # optimization (i.e. a N atom from one of the ligands separated from Ir). The # max distance allowed can be tuned in length_filter bond_length = rdMolTransforms.GetBondLength( mol_conf, metal_idx, atom_indexes[i]) if ligand_links[i] == 'P': length_filter = 2.60 else: length_filter = 2.25 if bond_length > length_filter: passing = False break for j, _ in enumerate(atom_indexes): # Avoid combinations of the same atom with itself if atom_indexes[i] != atom_indexes[j]: # We know that the ligands never have 2 carbon atoms bonding the Ir atom. We # only use atom_indexes[i] for C atoms, and atom_indexes[j] for the potential # N atoms that are part of the same Ph_Py ligand if ligand_links[i] == 'C': # This part detects the Ir-C bond and breaks it, breaking the Ph_Py ring bond = mol.GetBondBetweenAtoms( atom_indexes[i], metal_idx) new_mol = Chem.FragmentOnBonds( mol, [bond.GetIdx()], addDummies=True, dummyLabels=[(atom_indexes[i], metal_idx)]) if new_mol.GetAtomWithIdx( atom_indexes[i]).IsInRingSize(5): five_mem = True else: five_mem = False # identify whether or not the initial 5-membered ring formed between [-Ir-C-C-C-N-] is broken when we break the Ir-C bond. This works # because Ph_Py units bind Ir in the same way always, through 1 C and 1 N that are in the same position, forming a 5-membered ring. # If this ring is broken, atom_indexes[j] will not be part of a 5-membered ring (atom.IsInRingSize(5) == False) which means that # this atom was initially inside the same ligand as the parent C of atom_indexes[i]) if not five_mem: if not new_mol.GetAtomWithIdx( atom_indexes[j]).IsInRingSize(5): bond_2 = mol.GetBondBetweenAtoms( atom_indexes[j], metal_idx) new_mol_2 = Chem.FragmentOnBonds( mol, [bond_2.GetIdx()], addDummies=True, dummyLabels=[(atom_indexes[j], metal_idx)]) #doing backwards as well eg. Ir N bond if not new_mol_2.GetAtomWithIdx( atom_indexes[i]).IsInRingSize(5): ligand_atoms.append( [atom_indexes[i], atom_indexes[j]]) break else: if not new_mol.GetAtomWithIdx( atom_indexes[j]).IsInRingSize(5): if mol.GetAtomWithIdx( atom_indexes[j]).IsInRingSize(5): ligand_atoms.append( [atom_indexes[i], atom_indexes[j]]) break if passing: # This stop variable and the breaks inside the inner loops will break the nested loop if there # is one angle that does not meet the criteria for valid conformers stop = False # For complexes with 3 Ph_Py ligands: if len(ligand_atoms) == 3: for i, _ in enumerate(ligand_atoms): if not stop: for j, _ in enumerate(ligand_atoms): # the i<=j part avoids repeating atoms, the i != j part avoid angles # containing the same number twice (i.e. 4-16-4, this angle will fail) if i <= j and i != j: # Calculate the angle between 2 N atoms from different Ph_Py ligands. # When there are 3 Ph_Py ligands, no 2 N atoms must be in 180 degrees angle = rdMolTransforms.GetAngleDeg( mol_conf, ligand_atoms[i][1], metal_idx, ligand_atoms[j][1]) if (180 - args.angle_off) <= angle <= ( 180 + args.angle_off): passing = False break # For complexes with 2 Ph_Py ligands + 1 ligand that is not Ph_Py if len(ligand_atoms) == 2: # Since there are only 2 N atoms, we do not need to include a nested loop angle = rdMolTransforms.GetAngleDeg( mol_conf, ligand_atoms[0][1], metal_idx, ligand_atoms[1][1]) # Calculate the angle between 2 N atoms from different Ph_Py ligands. # When there are 2 Ph_Py ligands, the 2 N atoms from the 2 Ph_Py ligands must be in 180 degrees if (180 - args.angle_off) <= angle <= (180 + args.angle_off): pass else: passing = False # it filters off molecules that the SDF only detects 5 Ir neighbours else: passing = False return passing