def generate_conformers(uid,lig_file,init='generate_conformers_init'): """ Forgets the initial coordinates of the molecule. Looses all hydrogens. Adds all hydrogens. Generates random conformers. Optimizes conformers with MMFF94 Force Field. Saves multiframe PDB file of the ligand with new coordinates. :param lig_file: string (path to the ligand file in the PDB format to read) :param init: string (init function) :return: nested list: of dimension [1x[string]]. String is the relative path to the output file. """ # TODO: test if shape if forgotten (give an option to optimize only?) init = eval(init) conf_outpath = os.path.join(init.conformers_path,uid + ".pdb") mol = Chem.MolFromPDBFile(lig_file) mol = Chem.RemoveHs(mol) mol = Chem.AddHs(mol) conf_ids = AllChem.EmbedMultipleConfs(mol, clearConfs=True, numConfs=init.num_conformers) pdb_writer = PDBWriter(conf_outpath) for cid in conf_ids: AllChem.MMFFOptimizeMolecule(mol, confId=cid) if not init.out_H: mol = Chem.RemoveHs(mol) pdb_writer.write(mol, confId=cid) pdb_writer.close() return [[uid,os.path.join(init.conformers_dir,uid+".pdb")]]
def write_frame(host_coords, host_mol, guest_coords, guest_mol, guest_name, outdir, step, stage): if not os.path.exists(os.path.join(outdir, guest_name)): os.mkdir(os.path.join(outdir, guest_name)) host_frame = host_mol.GetConformer() for i in range(host_mol.GetNumAtoms()): x, y, z = host_coords[i] host_frame.SetAtomPosition(i, Point3D(x, y, z)) conf_id = host_mol.AddConformer(host_frame) writer = PDBWriter( os.path.join( outdir, guest_name, f"{guest_name}_{stage}_{step}_host.pdb", )) writer.write(host_mol, conf_id) writer.close() host_mol.RemoveConformer(conf_id) guest_frame = guest_mol.GetConformer() for i in range(guest_mol.GetNumAtoms()): x, y, z = guest_coords[i] guest_frame.SetAtomPosition(i, Point3D(x, y, z)) conf_id = guest_mol.AddConformer(guest_frame) guest_mol.SetProp("_Name", f"{guest_name}_{stage}_{step}_guest") writer = SDWriter( os.path.join( outdir, guest_name, f"{guest_name}_{stage}_{step}_guest.sdf", )) writer.write(guest_mol, conf_id) writer.close() guest_mol.RemoveConformer(conf_id)
def generate_conformers(lig_file, init='generate_conformers_init'): # option pdb_mol """Performs the following tasks: > Creates the receptor folders within binding_ligands and decoy_ligands > Converts all PDB crystal ligands into mol for future use > Generates conformers and saves them to crystal_ligands folder""" init = eval(init) lig_name = lig_file[len(init.lig_path)+1: ] # use rdkit to get a mol object from the PDB pdb_file = os.path.join(init.out_lig_path, lig_name) mol_file = os.path.join(init.mol_path, lig_name).replace('.pdb', '.sdf') # write the mol to a mol file for future use mol = Chem.MolFromPDBFile(lig_file) writer = SDWriter(mol_file) writer.write(mol) # generate conformers and get the number of atoms of the molecule mol2 = Chem.AddHs(mol) # addHs pdb_writer = PDBWriter(pdb_file) conf_ids = AllChem.EmbedMultipleConfs(mol2, init.num_conformers) # PDB has hydrogens for cid in conf_ids: AllChem.MMFFOptimizeMolecule(mol2, confId=cid) mol = Chem.RemoveHs(mol2) pdb_writer.write(mol) num_atoms = Mol.GetNumAtoms(mol) pdb_writer.close() # also has hydrogens print 'Generated conformers for one ligand' return [[pdb_file, mol_file, num_atoms]]
def get_decoys(pdb_file, mol_file, num_atoms, init='get_decoys_init'): """For each binding ligand, gets a list of decoy ligands. We filter by number of atoms and maximum common substructure (MCS). Then we generate conformers for each decoy and save them to the decoy_ligands folder""" init = eval(init) reader = SDMolSupplier(mol_file) mol = reader[0] output = [] iterator = range(len(init.all_mols)) random.shuffle(iterator) for i in iterator: if (init.all_mol_files[i] == mol_file or \ abs(init.all_num_atoms[i] - num_atoms) > init.max_atom_dif): continue mcs = MCS.FindMCS([init.all_mols[i], mol], minNumAtoms=init.max_substruct, ringMatchesRingOnly=True, completeRingsOnly=True, timeout=1) if mcs.numAtoms == -1: #save the mol object as a PDB file in the decoys folder decoy_file = pdb_file.replace('/binding_ligands/', '/decoy_ligands/').replace( '.pdb', str(len(output)) + '.pdb') pdb_writer = PDBWriter(decoy_file) # generate the decoy and its conformers decoy2 = Chem.AddHs(init.all_mols[i]) conf_ids = AllChem.EmbedMultipleConfs(decoy2, init.num_conformers) for cid in conf_ids: AllChem.MMFFOptimizeMolecule(decoy2, confId=cid) decoy = Chem.RemoveHs(decoy2) pdb_writer.write(decoy) pdb_writer.close() output.append([init.all_pdb_files[i], decoy_file]) if len(output) >= init.max_num_decoys: break print 'Got the decoys for one ligand' return output
def docking(k): # mol_id = k.split("/")[-1] mol_id = k protein = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_protein.pdb") protein_pdbqt = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_protein.pdbqt") ligand = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_ligand.sdf") ligand_mol2 = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_ligand.mol2") log_name = os.path.join(log_dir, f'{mol_id}.log') out_name = os.path.join(out_pdbqt_dir, f'{mol_id}_out.pdbqt') pdb_name = os.path.join(pdb_dir, f'{mol_id}.pdb') pdbqt_name = os.path.join(pdbqt_dir, f'{mol_id}.pdbqt') ligand_rcsb = os.path.join(rcsb_dir, mol_id, f"{mol_id}.sdf") if os.path.exists(out_name): return # Generate 3D structure of ligand # m = Chem.MolFromSmiles(smiles) m = SDMolSupplier(ligand)[0] if m is None and os.path.exists(ligand_rcsb): m = SDMolSupplier(ligand_rcsb)[0] if m is None: m = Chem.MolFromMol2File(ligand_mol2) if m is None: return Chem.SanitizeMol(m) # Adding hydrogen atoms to molecule m = Chem.AddHs(m) cids = AllChem.EmbedMultipleConfs( m, numConfs=20, ) cenergy = [] for conf in cids: converged = not AllChem.UFFOptimizeMolecule(m, confId=conf) cenergy.append( AllChem.UFFGetMoleculeForceField(m, confId=conf).CalcEnergy()) min_idx = cenergy.index(min(cenergy)) m = Chem.RemoveHs(m) w = PDBWriter(pdb_name) w.write(m, min_idx) w.close() # pdb to pdbqt (both of ligand and protein) if not os.path.exists(pdbqt_name): os.system(f'obabel {pdb_name} -O {pdbqt_name}') if not os.path.exists(protein_pdbqt): os.system(f'obabel {protein} -O {protein_pdbqt}') command = f"smina \ -r {protein_pdbqt} \ -l {pdbqt_name} \ --autobox_ligand {ligand} \ --autobox_add 8 \ --exhaustiveness 8 \ --log {log_name} \ -o {out_name} \ --cpu 1 \ --num_modes 100 \ --seed 0" os.system(command)
def docking(k): # id_dic -> STOCK id : complex_id # smiles_dic -> STOCK id : SMILES complex_id, prev_mol_id = k mol_id = f'{prev_mol_id}_{complex_id}' protein = os.path.join(pdbbind_dir, complex_id, f'{complex_id}_protein.pdb') protein_pdbqt = os.path.join(pdbbind_dir, complex_id, f'{complex_id}_protein.pdbqt') ligand = os.path.join(pdbbind_dir, complex_id, f'{complex_id}_ligand.sdf') ligand_mol2 = os.path.join(pdbbind_dir, complex_id, f'{complex_id}_ligand.mol2') ligand_rcsb = os.path.join(rcsb_dir, complex_id, f'{complex_id}.sdf') log_name = os.path.join(log_dir, f'{mol_id}.log') out_name = os.path.join(out_pdbqt_dir, f'{mol_id}_out.pdbqt') pdb_name = os.path.join(pdb_dir, f'{mol_id}.pdb') pdbqt_name = os.path.join(pdbqt_dir, f'{mol_id}.pdbqt') if os.path.exists(out_name): return # Generate 3D structure of ligand try: m = Chem.MolFromSmiles(smiles_dic[prev_mol_id]) except: print("molecule generation failed!") return Chem.SanitizeMol(m) # Adding hydrogen atoms to molecule m = Chem.AddHs(m) cids = AllChem.EmbedMultipleConfs( m, numConfs=20, ) cenergy = [] for conf in cids: converged = not AllChem.UFFOptimizeMolecule(m, confId=conf) cenergy.append( AllChem.UFFGetMoleculeForceField(m, confId=conf).CalcEnergy()) min_idx = cenergy.index(min(cenergy)) m = Chem.RemoveHs(m) w = PDBWriter(pdb_name) w.write(m, min_idx) w.close() # pdb to pdbqt (both of ligand and protein) if not os.path.exists(pdbqt_name): os.system(f'obabel {pdb_name} -O {pdbqt_name}') if not os.path.exists(protein_pdbqt): os.system(f'obabel {protein} -O {protein_pdbqt}') command = f'smina \ -r {protein_pdbqt} \ -l {pdbqt_name} \ --autobox_ligand {ligand} \ --autobox_add 8 \ --exhaustiveness 8 \ --log {log_name} \ -o {out_name} \ --cpu 1 \ --num_modes 9 \ --seed 0' os.system(command)