def delete_shell(core_mol, del_mol, cut_off, in_out='in'): """ This function deletes molecules present in the passed argument del_mol that are far (in_out=out) or close (in_out=in) than the selected cutoff distance (in A) from the passed molecules core_mol Parameters: ----------- core_mol: OEMol molecule The core molecules del_mol: OEMol molecule The molecules to be deleted if their distances from the core_mol molecules are greater or closer that the selected cutoff distance cut_off: python float number The threshold distance in A used to mark atom for deletion in_out: python string A flag used to select if delete molecules far or close than the cutoff distance from the core_mol Return: ------- reset_del: copy of del_mol where atoms have been deleted with reset atom indexes """ if in_out not in ['in', 'out']: raise ValueError( "The passed in_out parameter is not recognized: {}".format(in_out)) # Copy the passed molecule to delete in to_del = oechem.OEMol(del_mol) # Create a OE bit vector mask for each atoms of the # molecule to delete bv = oechem.OEBitVector(to_del.GetMaxAtomIdx()) bv.NegateBits() # Create the Nearest neighbours nn = oechem.OENearestNbrs(to_del, cut_off) for nbrs in nn.GetNbrs(core_mol): # bv.SetBitOff(nbrs.GetBgn().GetIdx()) for atom in oechem.OEGetResidueAtoms(nbrs.GetBgn()): bv.SetBitOff(atom.GetIdx()) # Invert selection mask if in_out == 'in': bv.NegateBits() pred = oechem.OEAtomIdxSelected(bv) for atom in to_del.GetAtoms(pred): to_del.DeleteAtom(atom) # It is necessary to reset the atom indexes of the molecule with # delete atoms to avoid possible mismatching reset_del = oechem.OEMol(to_del) return reset_del
def strip_water_ions(in_system): """ This function remove waters and ions molecules from the input system Parameters: ---------- in_system : oechem.OEMol The bio-molecular system to clean opt: python dictionary The system option Output: ------- clean_system : oechem.OEMol The cleaned system """ # Copy the input system system = in_system.CreateCopy() # Create a bit vector mask bv = oechem.OEBitVector(system.GetMaxAtomIdx()) bv.NegateBits() # Create a Hierarchical View of the protein system hv = oechem.OEHierView( system, oechem.OEAssumption_BondedResidue + oechem.OEAssumption_ResPerceived) # Looping over the system residues for chain in hv.GetChains(): for frag in chain.GetFragments(): for hres in frag.GetResidues(): res = hres.GetOEResidue() # Check if a residue is a mono atomic ion natoms = 0 for at in hres.GetAtoms(): natoms += 1 # Set the atom bit mask off if oechem.OEGetResidueIndex( res) == oechem.OEResidueIndex_HOH or natoms == 1: # Set Bit mask atms = hres.GetAtoms() for at in atms: bv.SetBitOff(at.GetIdx()) # Extract the system without waters or ions pred = oechem.OEAtomIdxSelected(bv) clean_system = oechem.OEMol() oechem.OESubsetMol(clean_system, system, pred) return clean_system
def check_shell(core_mol, check_mol, cutoff): """ This function checks if at least one atomic distance from the passed check_mol molecule to the core_mol molecule is less than the selected cutoff distance in A. Parameters: ----------- core_mol: OEMol molecule The core molecule check_mol: OEMol molecule The molecule to be checked if inside or outside a shell surrounding the core_mole with radius equal to the cutoff threshold cut_off: python float number The threshold distance in A used to mark atom inside or outside the shell Return: ------- in_out: python boolean True if at least one of check_mol atom distance from core_mole is less than the selected cutoff threshold """ # Create a OE bit vector mask for each atoms of the # molecule to be checked bv = oechem.OEBitVector(check_mol.GetMaxAtomIdx()) # Create the Nearest neighbours nn = oechem.OENearestNbrs(check_mol, cutoff) # Check neighbours setting the atom bit mask for nbrs in nn.GetNbrs(core_mol): bv.SetBitOn(nbrs.GetBgn().GetIdx()) # Create predicate based on the atom bit mask pred = oechem.OEAtomIdxSelected(bv) # Checking flag in_out = False # If just one chem_mol atom is inside the cutoff distance return True for atom in check_mol.GetAtoms(pred): in_out = True break return in_out
def around(dist, ls): """ This function select atom not far than the threshold distance from the current selection. The threshold distance is in Angstrom selection can be: mask = '5.0 around ligand' """ # at = system.GetAtom(oechem.OEHasAtomIdx(idx)) # Atom set selection atom_set_around = set() # Create a OE bit vector mask for each atoms bv_around = oechem.OEBitVector(system.GetMaxAtomIdx()) # Set the mask atom for at in system.GetAtoms(): if at.GetIdx() in ls: bv_around.SetBitOn(at.GetIdx()) # Predicate pred = oechem.OEAtomIdxSelected(bv_around) # Create the system molecule based on the atom mask molecules = oechem.OEMol() oechem.OESubsetMol(molecules, system, pred) # Create the Nearest neighbours nn = oechem.OENearestNbrs(system, float(dist)) for nbrs in nn.GetNbrs(molecules): for atom in oechem.OEGetResidueAtoms(nbrs.GetBgn()): if atom.GetIdx() in ls: continue atom_set_around.add(atom.GetIdx()) return atom_set_around
def DropLigandFromProtein(prot, lig): """delete atoms from the protein w/same coords as the ligand as well as any waters""" approximatelyTheSame = 0.05 nn = oechem.OENearestNbrs(prot, approximatelyTheSame) # mark ligand atoms for deletion bv = oechem.OEBitVector(prot.GetMaxAtomIdx()) for nbrs in nn.GetNbrs(lig): r1 = oechem.OEAtomGetResidue(nbrs.GetBgn()) r2 = oechem.OEAtomGetResidue(nbrs.GetEnd()) if r1.GetModelNumber() == r2.GetModelNumber(): bv.SetBitOn(nbrs.GetBgn().GetIdx()) # mark waters for deletion too for atom in prot.GetAtoms(): res = oechem.OEAtomGetResidue(atom) if oechem.OEGetResidueIndex(res) == oechem.OEResidueIndex_HOH: bv.SetBitOn(atom.GetIdx()) pred = oechem.OEAtomIdxSelected(bv) for atom in prot.GetAtoms(pred): prot.DeleteAtom(atom)
def oesolvate(solute, density=1.0, padding_distance=10.0, distance_between_atoms=2.5, solvents='tip3p', molar_fractions='1.0', geometry='box', close_solvent=True, salt='[Na+], [Cl-]', salt_concentration=0.0, neutralize_solute=True, verbose=False, return_components=False, **kargs): """ This function solvates the passed solute in a cubic box or a sphere by using Packmol. Packmol creates an initial point for molecular dynamics simulations by packing molecule in defined regions of space. For additional info: http://www.ime.unicamp.br/~martinez/packmol/home.shtml The geometry volume is estimated by the using the padding parameter and the solute size. The number of solvent molecules is calculated by using the specified density and volume. Solvent molecules are specified as comma separated smiles strings. The molar fractions of each solvent molecule are specified in a similar fashion. By default if the solute is charged counter ions are added to neutralize it Parameters: ----------- solute: OEMol molecule The solute to solvate density: float The solution density in g/ml padding_distance: float The largest dimension of the solute (along the x, y, or z axis) is determined (in A), and a cubic box of size (largest dimension)+2*padding is used distance_between_atoms: float The minimum distance between atoms in A solvents: python string A comma separated smiles string or keywords for the solvent molecules. Special water models can be selected by using the keywords: tip3p for TIP3P water model geometry molar_fractions: python string A comma separated molar fraction string of the solvent molecules close_solvent: boolean If True solvent molecules will be placed very close to the solute salt: python string A comma separated string of the dissociated salt in solution salt_concentration: float Salt concentration in millimolar neutralize_solute: boolean If True counter-ions will be added to the solution to neutralize the solute verbose: Bool If True verbose mode is enabled return_components: Bool If True the added solvent molecules are also returned as OEMol Return: ------- oe_mol: OEMol The solvated system. If the selected geometry is a box a SD tag with name 'box_vector' is attached the output molecule containing the system box vectors. oe_mol_components: OEMol If the return_components flag is True the added solvent molecules are returned as an additional OEMol """ def BoundingBox(molecule): """ This function calculates the Bounding Box of the passed molecule molecule: OEMol return: bb (numpy array) the calculated bounding box is returned as numpy array: [(xmin,ymin,zmin), (xmax,ymax,zmax)] """ coords = [v for k, v in molecule.GetCoords().items()] np_coords = np.array(coords) min_coord = np_coords.min(axis=0) max_coord = np_coords.max(axis=0) bb = np.array([min_coord, max_coord]) return bb if shutil.which("packmol") is None: raise (IOError("Packmol executable not found")) # Extract solvent smiles strings and mole fractions solvents = [sm.strip() for sm in solvents.split(',')] fractions = [float(mf) for mf in molar_fractions.split(',')] # If the smiles string and mole fractions lists have different lengths raise an error if len(solvents) != len(fractions): raise ValueError( "Selected solvent number and selected molar fraction number mismatch: {} vs {}" .format(len(solvents), len(fractions))) # Remove smiles string with 0.0 mole fraction solvent_smiles = [ solvents[i] for i, v in enumerate(fractions) if fractions[i] ] mol_fractions = [mf for mf in fractions if mf] # Mole fractions are non-negative numbers if any([v < 0.0 for v in mol_fractions]): raise ValueError("Error: Mole fractions are non-negative real numbers") # Mole fractions must sum up to 1.0 if abs(sum(mol_fractions) - 1.0) > 0.001: oechem.OEThrow.Error("Error: Mole fractions do not sum up to 1.0") if geometry not in ['box', 'sphere']: raise ValueError( "Error geometry: the supported geometries are box and sphere not {}" .format(geometry)) # Set Units density = density * unit.grams / unit.milliliter padding_distance = padding_distance * unit.angstrom salt_concentration = salt_concentration * unit.millimolar # Calculate the Solute Bounding Box BB_solute = BoundingBox(solute) # Estimate of the box cube length box_edge = 2.0 * padding_distance + np.max(BB_solute[1] - BB_solute[0]) * unit.angstrom if geometry == 'box': # Box Volume Volume = box_edge**3 if geometry == 'sphere': Volume = (4.0 / 3.0) * 3.14159265 * (0.5 * box_edge)**3 # Omega engine is used to generate conformations omegaOpts = oeomega.OEOmegaOptions() omegaOpts.SetMaxConfs(1) omegaOpts.SetStrictStereo(False) omega = oeomega.OEOmega(omegaOpts) # Create a string code to identify the solute residues. The code ID used is based # on the residue number id, the residue name and the chain id: # id+resname+chainID hv_solute = oechem.OEHierView( solute, oechem.OEAssumption_BondedResidue + oechem.OEAssumption_ResPerceived) solute_resid_list = [] for chain in hv_solute.GetChains(): for frag in chain.GetFragments(): for hres in frag.GetResidues(): oe_res = hres.GetOEResidue() solute_resid_list.append( str(oe_res.GetResidueNumber()) + oe_res.GetName() + chain.GetChainID()) # Solvent component list_names solvent_resid_dic_names = dict() # Neutralize solute ion_sum_wgt_n_ions = 0.0 * unit.grams / unit.mole if neutralize_solute: # Container for the counter-ions oe_ions = [] # Container for the ion smiles strings ions_smiles = [] solute_formal_charge = 0 for at in solute.GetAtoms(): solute_formal_charge += at.GetFormalCharge() if solute_formal_charge > 0: ions_smiles.append("[Cl-]") elif solute_formal_charge < 0: ions_smiles.append("[Na+]") else: pass # Total number of counter-ions to neutralize the solute n_ions = abs(solute_formal_charge) # print("Counter ions to add = {} of {}".format(n_ions, ions_smiles[0])) # Ions if n_ions >= 1: for sm in ions_smiles: mol = oechem.OEMol() if not oechem.OESmilesToMol(mol, sm): raise ValueError( "Error counter ions: SMILES string parsing fails for the string: {}" .format(sm)) # Generate conformer if not omega(mol): raise ValueError( "Error counter ions: Conformer generation fails for the molecule with " "smiles string: {}".format(sm)) oe_ions.append(mol) if sm == '[Na+]': solvent_resid_dic_names[' NA'] = mol else: solvent_resid_dic_names[' CL'] = mol ion_sum_wgt = 0.0 * unit.grams / unit.mole for ion in oe_ions: # Molecular weight ion_sum_wgt += oechem.OECalculateMolecularWeight( ion) * unit.grams / unit.mole ion_sum_wgt_n_ions = ion_sum_wgt * n_ions # Create ions .pdb files ions_smiles_pdbs = [] for i in range(0, len(ions_smiles)): pdb_name = os.path.basename(tempfile.mktemp(suffix='.pdb')) pdb_name = ions_smiles[i] + '_' + pdb_name ions_smiles_pdbs.append(pdb_name) for i in range(0, len(ions_smiles)): ofs = oechem.oemolostream(ions_smiles_pdbs[i]) oechem.OEWriteConstMolecule(ofs, oe_ions[i]) # Add salts to the solution # Solvent smiles string parsing char_set = string.ascii_uppercase salt_sum_wgt_n_salt = 0.0 * unit.grams / unit.mole if salt_concentration > 0.0 * unit.millimolar: salt_smiles = [sm.strip() for sm in salt.split(',')] # Container list of oemol salt molecules generated by using smiles strings oe_salt = [] for sm in salt_smiles: mol_salt = oechem.OEMol() if not oechem.OESmilesToMol(mol_salt, sm): raise ValueError( "Error salt: SMILES string parsing fails for the string: {}" .format(sm)) # Generate conformer if not omega(mol_salt): raise ValueError( "Error salt: Conformer generation fails for the " "molecule with smiles string: {}".format(sm)) # Unique 3 code letter are set as solvent residue names solv_id = ''.join(random.sample(char_set * 3, 3)) # Try to recognize the residue name oechem.OEPerceiveResidues(mol_salt) for atmol in mol_salt.GetAtoms(): res = oechem.OEAtomGetResidue(atmol) if res.GetName() == 'UNL': res.SetName(solv_id) oechem.OEAtomSetResidue(atmol, res) if solv_id not in solvent_resid_dic_names: solvent_resid_dic_names[solv_id] = mol_salt else: if res.GetName() not in solvent_resid_dic_names: solvent_resid_dic_names[res.GetName()] = mol_salt break oe_salt.append(mol_salt) n_salt = int( round(unit.AVOGADRO_CONSTANT_NA * salt_concentration * Volume.in_units_of(unit.liter))) # for i in range(0, len(salt_smiles)): # print("Number of molecules for the salt component {} = {}".format(salt_smiles[i], n_salt)) salt_sum_wgt = 0.0 * unit.grams / unit.mole for salt in oe_salt: # Molecular weight salt_sum_wgt += oechem.OECalculateMolecularWeight( salt) * unit.grams / unit.mole salt_sum_wgt_n_salt = salt_sum_wgt * n_salt # Create salt .pdb files if n_salt >= 1: salt_pdbs = [] for i in range(0, len(salt_smiles)): pdb_name = os.path.basename(tempfile.mktemp(suffix='.pdb')) # pdb_name = salt_smiles[i] + '_' + pdb_name salt_pdbs.append(pdb_name) for i in range(0, len(salt_smiles)): ofs = oechem.oemolostream(salt_pdbs[i]) oechem.OEWriteConstMolecule(ofs, oe_salt[i]) # Container list of oemol solvent molecules generated by using smiles strings oe_solvents = [] for sm in solvent_smiles: if sm == 'tip3p': tip3p_fn = os.path.join(PACKAGE_DIR, 'oeommtools', 'data', 'tip3p.pdb') ifs = oechem.oemolistream(tip3p_fn) mol_sol = oechem.OEMol() if not oechem.OEReadMolecule(ifs, mol_sol): raise IOError( "It was not possible to read the tip3p molecule file") else: mol_sol = oechem.OEMol() if not oechem.OESmilesToMol(mol_sol, sm): raise ValueError( "Error solvent: SMILES string parsing fails for the string: {}" .format(sm)) # Generate conformer if not omega(mol_sol): raise ValueError( "Error solvent: Conformer generation fails for " "the molecule with smiles string: {}".format(sm)) # Unique 3 code letter are set as solvent residue names solv_id = ''.join(random.sample(char_set * 3, 3)) # Try to recognize the residue name oechem.OEPerceiveResidues(mol_sol) for atmol in mol_sol.GetAtoms(): res = oechem.OEAtomGetResidue(atmol) if res.GetName() == 'UNL': res.SetName(solv_id) oechem.OEAtomSetResidue(atmol, res) if solv_id not in solvent_resid_dic_names: solvent_resid_dic_names[solv_id] = mol_sol else: if res.GetName() not in solvent_resid_dic_names: solvent_resid_dic_names[res.GetName()] = mol_sol break oe_solvents.append(mol_sol) # Sum of the solvent molecular weights solvent_sum_wgt_frac = 0.0 * unit.grams / unit.mole for idx in range(0, len(oe_solvents)): # Molecular weight wgt = oechem.OECalculateMolecularWeight( oe_solvents[idx]) * unit.grams / unit.mole solvent_sum_wgt_frac += wgt * mol_fractions[idx] # Solute molecular weight solute_wgt = oechem.OECalculateMolecularWeight( solute) * unit.gram / unit.mole # Estimate of the number of each molecular species present in the solution accordingly # to their molar fraction fi: # # ni = fi*(density*volume*NA - wgt_solute - sum_k(wgt_salt_k*nk) - wgt_ion*n_ion)/sum_j(wgt_nj * fj) # # where ni is the number of molecule of specie i, density the mixture density, volume the # mixture volume, wgt_solute the molecular weight of the solute, wgt_salt_k the molecular # weight of the salt component k, nk the number of molecule of salt component k, wgt_ion # the counter ion molecular weight, n_ions the number of counter ions and wgt_nj the molecular # weight of the molecule specie j with molar fraction fj div = (unit.AVOGADRO_CONSTANT_NA * density * Volume - (solute_wgt + salt_sum_wgt_n_salt + ion_sum_wgt_n_ions)) / solvent_sum_wgt_frac # Solvent number of monomers n_monomers = [int(round(mf * div)) for mf in mol_fractions] if not all([nm > 0 for nm in n_monomers]): raise ValueError( "Error negative number of solvent components: the density could be too low" ) # for i in range(0, len(solvent_smiles)): # print("Number of molecules for the component {} = {}".format(solvent_smiles[i], n_monomers[i])) # Packmol Configuration file setting if close_solvent: header_template = """\n# Mixture\ntolerance {}\nfiletype pdb\noutput {}\nadd_amber_ter\navoid_overlap no""" else: header_template = """\n# Mixture\ntolerance {}\nfiletype pdb\noutput {}\nadd_amber_ter\navoid_overlap yes""" # Templates strings solute_template = """\n\n# Solute\nstructure {}\nnumber 1\nfixed 0. 0. 0. 0. 0. 0.\nresnumbers 1\nend structure""" if geometry == 'box': solvent_template = """\nstructure {}\nnumber {}\ninside box {:0.3f} {:0.3f} {:0.3f} {:0.3f} {:0.3f} {:0.3f}\ \nchain !\nresnumbers 3\nend structure""" if geometry == 'sphere': solvent_template = """\nstructure {}\nnumber {}\ninside sphere {:0.3f} {:0.3f} {:0.3f} {:0.3f}\ \nchain !\nresnumbers 3\nend structure""" # Create solvents .pdb files solvent_pdbs = [] for i in range(0, len(solvent_smiles)): pdb_name = os.path.basename(tempfile.mktemp(suffix='.pdb')) solvent_pdbs.append(pdb_name) for i in range(0, len(solvent_smiles)): ofs = oechem.oemolostream(solvent_pdbs[i]) oechem.OEWriteConstMolecule(ofs, oe_solvents[i]) solute_pdb = 'solute' + '_' + os.path.basename( tempfile.mktemp(suffix='.pdb')) ofs = oechem.oemolostream(solute_pdb) if solute.GetMaxConfIdx() > 1: raise ValueError("Solutes with multiple conformers are not supported") else: oechem.OEWriteConstMolecule(ofs, solute) # Write Packmol header section mixture_pdb = 'mixture' + '_' + os.path.basename( tempfile.mktemp(suffix='.pdb')) body = header_template.format(distance_between_atoms, mixture_pdb) # Write Packmol configuration file solute section body += solute_template.format(solute_pdb) # The solute is centered inside the box xc = (BB_solute[0][0] + BB_solute[1][0]) / 2. yc = (BB_solute[0][1] + BB_solute[1][1]) / 2. zc = (BB_solute[0][2] + BB_solute[1][2]) / 2. # Correct for periodic box conditions to avoid # steric clashes at the box edges pbc_correction = 1.0 * unit.angstrom xmin = xc - ((box_edge - pbc_correction) / 2.) / unit.angstrom xmax = xc + ((box_edge - pbc_correction) / 2.) / unit.angstrom ymin = yc - ((box_edge - pbc_correction) / 2.) / unit.angstrom ymax = yc + ((box_edge - pbc_correction) / 2.) / unit.angstrom zmin = zc - ((box_edge - pbc_correction) / 2.) / unit.angstrom zmax = zc + ((box_edge - pbc_correction) / 2.) / unit.angstrom # Packmol setting for the solvent section body += '\n\n# Solvent' for i in range(0, len(solvent_smiles)): if geometry == 'box': body += solvent_template.format(solvent_pdbs[i], n_monomers[i], xmin, ymin, zmin, xmax, ymax, zmax) if geometry == 'sphere': body += solvent_template.format(solvent_pdbs[i], n_monomers[i], xc, yc, zc, 0.5 * box_edge / unit.angstrom) # Packmol setting for the salt section if salt_concentration > 0.0 * unit.millimolar and n_salt >= 1: body += '\n\n# Salt' for i in range(0, len(salt_smiles)): if geometry == 'box': body += solvent_template.format(salt_pdbs[i], int(round(n_salt)), xmin, ymin, zmin, xmax, ymax, zmax) if geometry == 'sphere': body += solvent_template.format(salt_pdbs[i], int(round(n_salt)), xc, yc, zc, 0.5 * box_edge / unit.angstrom) # Packmol setting for the ions section if neutralize_solute and n_ions >= 1: body += '\n\n# Counter Ions' for i in range(0, len(ions_smiles)): if geometry == 'box': body += solvent_template.format(ions_smiles_pdbs[i], n_ions, xmin, ymin, zmin, xmax, ymax, zmax) if geometry == 'sphere': body += solvent_template.format(ions_smiles_pdbs[i], n_ions, xc, yc, zc, 0.5 * box_edge / unit.angstrom) # Packmol configuration file packmol_filename = os.path.basename(tempfile.mktemp(suffix='.inp')) with open(packmol_filename, 'w') as file_handle: file_handle.write(body) # Call Packmol if not verbose: mute_output = open(os.devnull, 'w') with open(packmol_filename, 'r') as file_handle: subprocess.check_call(['packmol'], stdin=file_handle, stdout=mute_output, stderr=mute_output) else: with open(packmol_filename, 'r') as file_handle: subprocess.check_call(['packmol'], stdin=file_handle) # Read in the Packmol solvated system solvated = oechem.OEMol() if os.path.exists(mixture_pdb + '_FORCED'): os.rename(mixture_pdb + '_FORCED', mixture_pdb) print("Warning: Packing solution is not optimal") ifs = oechem.oemolistream(mixture_pdb) oechem.OEReadMolecule(ifs, solvated) # To avoid to change the user oemol starting solute by reading in # the generated mixture pdb file and loosing molecule info, the # solvent molecules are extracted from the mixture system and # added back to the starting solute # Extract from the solution system the solvent molecules # by checking the previous solute generated ID: id+resname+chainID hv_solvated = oechem.OEHierView( solvated, oechem.OEAssumption_BondedResidue + oechem.OEAssumption_ResPerceived) # This molecule will hold the solvent molecules generated directly from # the omega conformers. This is useful to avoid problems related to read in # the solvent molecules from pdb files and triggering unwanted perceiving actions new_components = oechem.OEMol() bv = oechem.OEBitVector(solvated.GetMaxAtomIdx()) for chain in hv_solvated.GetChains(): for frag in chain.GetFragments(): for hres in frag.GetResidues(): oe_res = hres.GetOEResidue() if str(oe_res.GetResidueNumber()) + oe_res.GetName( ) + chain.GetChainID() not in solute_resid_list: oechem.OEAddMols(new_components, solvent_resid_dic_names[oe_res.GetName()]) atms = hres.GetAtoms() for at in atms: bv.SetBitOn(at.GetIdx()) pred = oechem.OEAtomIdxSelected(bv) components = oechem.OEMol() oechem.OESubsetMol(components, solvated, pred) new_components.SetCoords(components.GetCoords()) # This is necessary otherwise just one big residue is created oechem.OEPerceiveResidues(new_components) # Add the solvent molecules to the solute copy solvated_system = solute.CreateCopy() oechem.OEAddMols(solvated_system, new_components) # Set Title solvated_system.SetTitle(solute.GetTitle()) # Set ions resname to Na+ and Cl- for at in solvated_system.GetAtoms(): res = oechem.OEAtomGetResidue(at) if res.GetName() == ' NA': res.SetName("Na+") oechem.OEAtomSetResidue(atmol, res) elif res.GetName() == ' CL': res.SetName("Cl-") oechem.OEAtomSetResidue(atmol, res) else: pass # Cleaning to_delete = solvent_pdbs + [packmol_filename, solute_pdb, mixture_pdb] if salt_concentration > 0.0 * unit.millimolar and n_salt >= 1: to_delete += salt_pdbs if neutralize_solute and n_ions >= 1: to_delete += ions_smiles_pdbs for fn in to_delete: try: os.remove(fn) except: pass # Calculate the solution total density total_wgt = oechem.OECalculateMolecularWeight( solvated_system) * unit.gram / unit.mole density_mix = (1 / unit.AVOGADRO_CONSTANT_NA) * total_wgt / Volume print("Computed Solution Density = {}".format( density_mix.in_units_of(unit.gram / unit.milliliter))) # Threshold checking ths = 0.1 * unit.gram / unit.milliliter if not abs(density - density_mix.in_units_of(unit.gram / unit.milliliter)) < ths: raise ValueError( "Error: the computed density for the solute {} does not match the selected density {} vs {}" .format(solute.GetTitle(), density_mix, density)) if geometry == 'box': # Define the box vector and attached it as SD tag to the solvated system # with ID tag: 'box_vectors' box_vectors = (Vec3(box_edge / unit.angstrom, 0.0, 0.0), Vec3(0.0, box_edge / unit.angstrom, 0.0), Vec3(0.0, 0.0, box_edge / unit.angstrom)) * unit.angstrom box_vectors = data_utils.encodePyObj(box_vectors) solvated_system.SetData(oechem.OEGetTag('box_vectors'), box_vectors) if return_components: new_components.SetTitle(solute.GetTitle() + '_solvent_comp') return solvated_system, new_components else: return solvated_system
def applyffExcipients(excipients, opt): """ This function applies the selected force field to the excipients Parameters: ----------- excipients: OEMol molecule The excipients molecules to parametrize opt: python dictionary The options used to parametrize the excipients Return: ------- excipient_structure: Parmed structure instance The parametrized excipient parmed structure """ # OpenMM topology and positions from OEMol topology, positions = oeommutils.oemol_to_openmmTop(excipients) # Try to apply the selected FF on the excipients forcefield = app.ForceField(opt['protein_forcefield']) # List of the unrecognized excipients unmatched_res_list = forcefield.getUnmatchedResidues(topology) # Unique unrecognized excipient names templates = set() for res in unmatched_res_list: templates.add(res.name) if templates: # Some excipients are not recognized oechem.OEThrow.Info("The following excipients are not recognized " "by the protein FF: {}" "\nThey will be parametrized by using the FF: {}".format(templates, opt['other_forcefield'])) # Create a bit vector mask used to split recognized from un-recognize excipients bv = oechem.OEBitVector(excipients.GetMaxAtomIdx()) bv.NegateBits() # Dictionary containing the name and the parmed structures of the unrecognized excipients unrc_excipient_structures = {} # Dictionary used to skip already selected unrecognized excipients and count them unmatched_excp = {} # Ordered list of the unrecognized excipients unmatched_res_order = [] for r_name in templates: unmatched_excp[r_name] = 0 hv = oechem.OEHierView(excipients) for chain in hv.GetChains(): for frag in chain.GetFragments(): for hres in frag.GetResidues(): r_name = hres.GetOEResidue().GetName() if r_name not in unmatched_excp: continue else: unmatched_res_order.append(r_name) if unmatched_excp[r_name]: # Test if we have selected the unknown excipient # Set Bit mask atms = hres.GetAtoms() for at in atms: bv.SetBitOff(at.GetIdx()) unmatched_excp[r_name] += 1 else: unmatched_excp[r_name] = 1 # Create AtomBondSet to extract from the whole excipient system # the current selected FF unknown excipient atms = hres.GetAtoms() bond_set = set() for at in atms: bv.SetBitOff(at.GetIdx()) bonds = at.GetBonds() for bond in bonds: bond_set.add(bond) atom_bond_set = oechem.OEAtomBondSet(atms) for bond in bond_set: atom_bond_set.AddBond(bond) # Create the unrecognized excipient OEMol unrc_excp = oechem.OEMol() if not oechem.OESubsetMol(unrc_excp, excipients, atom_bond_set): oechem.OEThrow.Fatal("Is was not possible extract the residue: {}".format(r_name)) # Charge the unrecognized excipient if not oequacpac.OEAssignCharges(unrc_excp, oequacpac.OEAM1BCCCharges(symmetrize=True)): oechem.OEThrow.Fatal("Is was not possible to " "charge the extract residue: {}".format(r_name)) # If GAFF or GAFF2 is selected as FF check for tleap command if opt['other_forcefield'] in ['GAFF', 'GAFF2']: ff_utils.ParamLigStructure(oechem.OEMol(), opt['other_forcefield']).checkTleap if opt['other_forcefield'] == 'SMIRNOFF': unrc_excp = oeommutils.sanitizeOEMolecule(unrc_excp) # Parametrize the unrecognized excipient by using the selected FF pmd = ff_utils.ParamLigStructure(unrc_excp, opt['other_forcefield'], prefix_name=opt['prefix_name']+'_'+r_name) unrc_excp_struc = pmd.parameterize() unrc_excp_struc.residues[0].name = r_name unrc_excipient_structures[r_name] = unrc_excp_struc # Recognized FF excipients pred_rec = oechem.OEAtomIdxSelected(bv) rec_excp = oechem.OEMol() oechem.OESubsetMol(rec_excp, excipients, pred_rec) if rec_excp.NumAtoms() > 0: top_known, pos_known = oeommutils.oemol_to_openmmTop(rec_excp) ff_rec = app.ForceField(opt['protein_forcefield']) try: omm_system = ff_rec.createSystem(top_known, rigidWater=False) rec_struc = parmed.openmm.load_topology(top_known, omm_system, xyz=pos_known) except: oechem.OEThrow.Fatal("Error in the recognised excipient parametrization") # Unrecognized FF excipients bv.NegateBits() pred_unrc = oechem.OEAtomIdxSelected(bv) unrc_excp = oechem.OEMol() oechem.OESubsetMol(unrc_excp, excipients, pred_unrc) # Unrecognized FF excipients coordinates oe_coord_dic = unrc_excp.GetCoords() unrc_coords = np.ndarray(shape=(unrc_excp.NumAtoms(), 3)) for at_idx in oe_coord_dic: unrc_coords[at_idx] = oe_coord_dic[at_idx] # It is important the order used to assemble the structures. In order to # avoid mismatch between the coordinates and the structures, it is convenient # to use the unrecognized residue order unmatched_res_order_count = [] i = 0 while i < len(unmatched_res_order): res_name = unmatched_res_order[i] for j in range(i+1, len(unmatched_res_order)): if unmatched_res_order[j] == res_name: continue else: break if i == (len(unmatched_res_order) - 1): num = 1 unmatched_res_order_count.append((res_name, num)) break else: num = j - i unmatched_res_order_count.append((res_name, num)) i = j # Merge all the unrecognized Parmed structure unrc_struc = parmed.Structure() for pair in unmatched_res_order_count: res_name = pair[0] nums = pair[1] unrc_struc = unrc_struc + nums*unrc_excipient_structures[res_name] # Set the unrecognized coordinates unrc_struc.coordinates = unrc_coords # Set the parmed excipient structure merging # the unrecognized and recognized parmed # structures together if rec_excp.NumAtoms() > 0: excipients_structure = unrc_struc + rec_struc else: excipients_structure = unrc_struc return excipients_structure else: # All the excipients are recognized by the selected FF omm_system = forcefield.createSystem(topology, rigidWater=False) excipients_structure = parmed.openmm.load_topology(topology, omm_system, xyz=positions) return excipients_structure
def extract_aligned_prot_lig_wat_traj(md_components, flask, trj_fn, opt, nmax=30, water_cutoff=15.0): """ Extracts the aligned protein trajectory and aligned ligand trajectory and aligned Water trajectory from a MD trajectory of a larger system that includes other components (eg water). The passed in setup mol must have the topology that matches the trajectory, and its xyz coordinates are the reference for the alignment. The alignment is done on the alpha carbons (atom name CA) of the active site residues within cutoff from the ligand. Once the alignment is done, the protein and ligand trajectories are each placed into a separate OEMol, one conformer per trajectory frame. Water trajectory is selecting the nmax waters from the ligand and protein CA within the cutoff distance for each trajectory snapshot Inputs: md_components: MDComponents object The md components carrying the setup starting flask. flask: OEMol The system flask trj_fn: String The filename of the hdf5-format MD trajectory or Gromacs .trr file format water_cutoff: Float The cutoff distance between the PL binding site and the waters in angstroms nmax: Integer max number of waters to select Outputs: multi_conf_protein: A multi conformer OEMol for the protein, one conformer per frame. multi_conf_ligand: A multi conformer OEMol for the ligand, one conformer per frame. multi_conf_water: A multi conformer OEMol for the waters, one conformer per frame. """ # Extract protein, ligand, water and excipients from the flask # protein, ligand, water, excipients = oeommutils.split(flask, ligand_res_name="LIG") set_up_flask, map_dic = md_components.create_flask protein = md_components.get_protein ligand = md_components.get_ligand check_nmax = nmax_waters(protein, ligand, water_cutoff) if check_nmax < nmax: opt['Logger'].warn( "The selected number of max waters cannot fit around the protein binding site: {} vs {}" .format(nmax, check_nmax)) void, traj_ext = os.path.splitext(trj_fn) traj_dir = os.path.dirname(trj_fn) if traj_ext == '.h5': trj = md.load_hdf5(trj_fn) elif traj_ext == '.trr': pdb_fn = glob.glob(os.path.join(traj_dir, '*.pdb'))[0] trj = md.load_trr(trj_fn, top=pdb_fn) trj = trj[1:] else: raise ValueError( "Trajectory file format {} not recognized in the trajectory {}". format(traj_ext, trj_fn)) # System topology top_trj = trj.topology # Ligand indexes # lig_idx = top_trj.select("resname LIG") lig_idx = map_dic['ligand'] # Protein indexes # prot_idx = top_trj.select("protein") # It is safer to use OE toolkits than mdtraj which is missing the protein caps prot_idx = map_dic['protein'] # for at in protein.GetAtoms(): # prot_idx.append(at.GetIdx()) # Water oxygen indexes water_O_idx = top_trj.select("water and element O") # Protein carbon alpha indexes prot_ca_idx = top_trj.select("backbone and element C") # Cutoff for the selection of the binding site atoms in A cutoff_bs = 5.0 # Carbon alpha binding site indexes ca_bs_idx = md.compute_neighbors(trj[0], cutoff_bs / 10.0, lig_idx, haystack_indices=prot_ca_idx, periodic=True)[0] # Carbon alpha binding site and ligand indexes ca_bs_lig_idx = np.concatenate((ca_bs_idx, lig_idx)) # Image the protein-ligand trajectory so the complex does not jump across box boundaries protlig = trj[0].atom_slice(np.concatenate((prot_idx, lig_idx))) protligAtoms = [atom for atom in protlig.topology.atoms] with open(os.devnull, 'w') as devnull: with contextlib.redirect_stderr(devnull): trjImaged = trj.image_molecules(inplace=False, anchor_molecules=[protligAtoms], make_whole=True) # trjImaged = trj.image_molecules(inplace=False, anchor_molecules=[protligAtoms], make_whole=True) count = 0 water_max_frames = [] # TODO DEBUG # trjImaged = trjImaged[:10] for frame in trjImaged: # print(count, flush=True) # Water oxygen binding site indexes water_O_bs_idx = md.compute_neighbors(frame, water_cutoff / 10.0, ca_bs_lig_idx, haystack_indices=water_O_idx, periodic=True) # Pair combination water indexes times ligand indexes wat_lig_pairs = np.array(np.meshgrid(water_O_bs_idx, lig_idx)).T.reshape(-1, 2) # Distances between the waters and the ligand in nm wat_lig_distances = md.compute_distances(frame, wat_lig_pairs, periodic=True, opt=True) # Reshape the wat_lig_distances ns = np.reshape(wat_lig_distances, (len(water_O_bs_idx[0]), len(lig_idx))) # Min distances in nm between the oxygen waters and the ligand min_wat_O_lig_distances = np.min(ns, axis=1) # Pair combination water indexes times protein binding site carbon alpha indexes wat_ca_bs_pairs = np.array(np.meshgrid(water_O_bs_idx, ca_bs_idx)).T.reshape(-1, 2) # Distances between the waters and the protein binding site carbon alpha in nm wat_ca_bs_distances = md.compute_distances(frame, wat_ca_bs_pairs, periodic=True, opt=True) # Reshape the wat_ca_bs_distances ns = np.reshape(wat_ca_bs_distances, (len(water_O_bs_idx[0]), len(ca_bs_idx))) # Min distances in nm between the oxygen waters and the protein binding site carbon alpha min_wat_O_ca_bs_distances = np.min(ns, axis=1) metrics = min_wat_O_lig_distances + min_wat_O_ca_bs_distances metric_distances = dict() for wat_idx, m in zip(water_O_bs_idx[0], metrics): metric_distances[int(wat_idx)] = m water_list_sorted_max = sorted(metric_distances.items(), key=lambda x: x[1])[:nmax] if len(water_list_sorted_max) != nmax: raise ValueError( "The ordered water list has the wrong size {} vs expected {} for the frame {}" .format(len(water_list_sorted_max), nmax, count)) water_max_frames.append(water_list_sorted_max) # print(min_wat_O_ca_bs_distances) # print(pairs[:len(lig_idx), :]) # for p,d in zip(wat_ca_bs_pairs, wat_ca_bs_distances[0]): # print(p,d) count += 1 # Put the reference mol xyz into the 1-frame topologyTraj to use as a reference in the fit setup_mol_array_coords = oechem.OEDoubleArray(3 * set_up_flask.GetMaxAtomIdx()) set_up_flask.GetCoords(setup_mol_array_coords) setup_mol_xyzArr = np.array(setup_mol_array_coords) setup_mol_xyzArr.shape = (-1, 3) trj_reference = trjImaged[0] # convert from angstroms to nanometers trj_reference.xyz[0] = setup_mol_xyzArr / 10.0 # Fitting trjImaged.superpose(trj_reference, 0, ca_bs_idx) # Delete Original Trajectory to save memory del trj # Molecule copies ligand_reference = oechem.OEMol(ligand) protein_reference = oechem.OEMol(protein) count = 0 # Create the multi conformer protein, ligand and water molecules for frame in trjImaged.xyz: # print("Trj Image loop", count, flush=True) # Extract coordinates in A xyz = frame * 10 # Set flask Coordinates as the current frame for the water extraction flask.SetCoords(xyz.flatten()) water_list_sorted_max = water_max_frames[count] # print(water_list_sorted_max) # TODO The following solution to extract the waters do not # keep the water order # Mark the close water atoms and extract them bv = oechem.OEBitVector(nmax * 3) water_idx = [] for pair in water_list_sorted_max: ow = flask.GetAtom(oechem.OEHasAtomIdx(pair[0])) # Select the whole water molecule for atw in oechem.OEGetResidueAtoms(ow): bv.SetBitOn(atw.GetIdx()) water_idx.append(atw.GetIdx()) pred_vec = oechem.OEAtomIdxSelected(bv) water_nmax_reference = oechem.OEMol() oechem.OESubsetMol(water_nmax_reference, flask, pred_vec) # TODO The following solution to extract the waters # keep the water order but is it seems extremely inefficient # water_list = [] # for pair in water_list_sorted_max: # bv = oechem.OEBitVector(3) # water_idx = [] # ow = flask.GetAtom(oechem.OEHasAtomIdx(pair[0])) # # # Select the whole water molecule # for atw in oechem.OEGetResidueAtoms(ow): # bv.SetBitOn(atw.GetIdx()) # water_idx.append(atw.GetIdx()) # # pred_vec = oechem.OEAtomIdxSelected(bv) # water = oechem.OEMol() # oechem.OESubsetMol(water, flask, pred_vec) # # water_list.append(water) # # # # print(len(water_list)) # # water_nmax_reference = oechem.OEMol() # for w in water_list: # oechem.OEAddMols(water_nmax_reference, w) # ligand and protein conf coordinates lig_xyz_list = [10 * frame[idx] for idx in lig_idx] lig_confxyz = oechem.OEFloatArray(np.array(lig_xyz_list).ravel()) prot_xyz_list = [10 * frame[idx] for idx in prot_idx] prot_confxyz = oechem.OEFloatArray(np.array(prot_xyz_list).ravel()) # Initialize the protein, ligand and water molecule topologies if count == 0: multi_conf_water = oechem.OEMol(water_nmax_reference) if multi_conf_water.NumAtoms() % 3 != 0: raise ValueError("Number of Water atoms is not multiple of 3") # Clean ResNumber and Chain on the multi conf water molecule # oechem.OEPerceiveResidues(multi_conf_water, oechem.OEPreserveResInfo_All) multi_conf_water.SetTitle("Water_" + str(nmax)) res_num = 0 i = 0 for at in multi_conf_water.GetAtoms(): res = oechem.OEAtomGetResidue(at) res.SetSerialNumber(i) res.SetName("HOH") res.SetChainID("Z") if i % 3 == 0: res_num += 1 res.SetResidueNumber(res_num) i += 1 ligand_reference.SetCoords(lig_confxyz) protein_reference.SetCoords(prot_confxyz) multi_conf_ligand = oechem.OEMol(ligand_reference) multi_conf_protein = oechem.OEMol(protein_reference) # Attach the conformers on the multi conformer protein, ligand and water molecules else: water_confxyz = oechem.OEFloatArray( water_nmax_reference.NumAtoms() * 3) water_nmax_reference.GetCoords(water_confxyz) multi_conf_water.NewConf(water_confxyz) multi_conf_ligand.NewConf(lig_confxyz) multi_conf_protein.NewConf(prot_confxyz) count += 1 return multi_conf_protein, multi_conf_ligand, multi_conf_water