def test_hyd_contacts(): """Hydrophobic Contacts test""" hyd_contacts_count = [len(hydrophobic_contacts(rec, mol)[0]) for mol in mols] assert_array_equal(hyd_contacts_count, [14, 10, 7, 14, 10, 13, 17, 14, 17, 12, 12, 10, 10, 11, 9, 8, 8, 4, 9, 16, 15, 6, 9, 8, 5, 5, 8, 11, 7, 10, 7, 13, 4, 13, 9, 9, 9, 4, 6, 16, 10, 13, 10, 9, 8, 9, 13, 15, 13, 9, 11, 9, 7, 10, 5, 3, 5, 7, 7, 10, 11, 7, 10, 20, 9, 6, 6, 3, 7, 7, 4, 7, 6, 2, 5, 6, 14, 9, 4, 6, 11, 10, 9, 6, 10, 8, 6, 5, 6, 11, 8, 16, 9, 9, 11, 6, 8, 5, 8, 15])
def test_hyd_contacts(): """Hydrophobic Contacts test""" hyd_contacts_count = [ len(hydrophobic_contacts(rec, mol)[0]) for mol in mols ] assert_array_equal(hyd_contacts_count, [ 14, 10, 7, 14, 10, 13, 17, 14, 17, 12, 12, 10, 10, 11, 9, 8, 8, 4, 9, 16, 15, 6, 9, 8, 5, 5, 8, 11, 7, 10, 7, 13, 4, 13, 9, 9, 9, 4, 6, 16, 10, 13, 10, 9, 8, 9, 13, 15, 13, 9, 11, 9, 7, 10, 5, 3, 5, 7, 7, 10, 11, 7, 10, 20, 9, 6, 6, 3, 7, 7, 4, 7, 6, 2, 5, 6, 14, 9, 4, 6, 11, 10, 9, 6, 10, 8, 6, 5, 6, 11, 8, 16, 9, 9, 11, 6, 8, 5, 8, 15 ])
def hydrophobics_identifier(protein, ligand): ''' input: protein obj and ligand obj from oddt return: a list containing [residue's number, residue's name, residue's atomtype, ligand's atomtype, ligand's atom nunmber] ''' t = [] protein_atoms, ligand_atoms = hydrophobic_contacts(protein, ligand) for i in range(len(protein_atoms['resnum'])): t.append((protein_atoms['resnum'][i], protein_atoms['resname'][i], protein_atoms['atomtype'][i], ligand_atoms['atomtype'][i], ligand_atoms['id'][i])) return (t)
def build(self, ligands, protein=None): """ Descriptor building method Parameters ---------- ligands: array-like An array of generator of oddt.toolkit.Molecule objects for which the descriptor is computed protein: oddt.toolkit.Molecule object (default=None) Protein object to be used while generating descriptors. If none, then the default protein (from constructor) is used. Otherwise, protein becomes new global and default protein. Returns ------- descs: numpy array, shape=[n_samples, 351] An array of binana descriptors, aligned with input ligands """ if protein: self.set_protein(protein) else: protein = self.protein protein_dict = protein.atom_dict desc = None for mol in ligands: mol_dict = mol.atom_dict vec = np.array([], dtype=float) vec = tuple() # Vina # TODO: Asynchronous output from vina, push command to score and retrieve at the end? # TODO: Check if ligand has vina scores vec += tuple(self.vina.build(mol).flatten()) # Close Contacts (<4A) vec += tuple(self.cc_4.build(mol).flatten()) # Electrostatics (<4A) ele_rec_types, ele_lig_types = zip(*self.ele_types) ele_mol_atoms = atoms_by_type(mol_dict, ele_lig_types, 'atom_types_ad4') ele_rec_atoms = atoms_by_type(protein_dict, ele_rec_types, 'atom_types_ad4') ele = tuple() for r_t, m_t in self.ele_types: mol_ele_dict, rec_ele_dict = close_contacts( ele_mol_atoms[m_t], ele_rec_atoms[r_t], 4) if len(mol_ele_dict) and len(rec_ele_dict): ele += (mol_ele_dict['charge'] * rec_ele_dict['charge'] / np.sqrt((mol_ele_dict['coords'] - rec_ele_dict['coords'])**2).sum(axis=-1) * 138.94238460104697e4).sum(), # convert to J/mol else: ele += 0, vec += tuple(np.nan_to_num(ele)) # Ligand Atom Types atoms = atoms_by_type(mol_dict, self.ligand_atom_types, 'atom_types_ad4') vec += tuple([len(atoms[t]) for t in self.ligand_atom_types]) # Close Contacts (<2.5A) vec += tuple(self.cc_25.build(mol).flatten()) # H-Bonds (<4A) hbond_mol, hbond_rec, strict = hbonds(mol, protein, 4) # Retain only strict hbonds hbond_mol = hbond_mol[strict] hbond_rec = hbond_rec[strict] backbone = hbond_rec['isbackbone'] alpha = hbond_rec['isalpha'] beta = hbond_rec['isbeta'] other = ~alpha & ~beta donor_mol = hbond_mol['isdonor'] donor_rec = hbond_rec['isdonor'] hbond_vec = ((donor_mol & backbone & alpha).sum(), (donor_mol & backbone & beta).sum(), (donor_mol & backbone & other).sum(), (donor_mol & ~backbone & alpha).sum(), (donor_mol & ~backbone & beta).sum(), (donor_mol & ~backbone & other).sum(), (donor_rec & backbone & alpha).sum(), (donor_rec & backbone & beta).sum(), (donor_rec & backbone & other).sum(), (donor_rec & ~backbone & alpha).sum(), (donor_rec & ~backbone & beta).sum(), (donor_rec & ~backbone & other).sum()) vec += tuple(hbond_vec) # Hydrophobic contacts (<4A) hydrophobic = hydrophobic_contacts(mol, protein, 4)[1] backbone = hydrophobic['isbackbone'] alpha = hydrophobic['isalpha'] beta = hydrophobic['isbeta'] other = ~alpha & ~beta hyd_vec = ((backbone & alpha).sum(), (backbone & beta).sum(), (backbone & other).sum(), (~backbone & alpha).sum(), (~backbone & beta).sum(), (~backbone & other).sum(), len(hydrophobic)) vec += tuple(hyd_vec) # Pi-stacking (<7.5A) pi_mol, pi_rec, pi_paralel, pi_tshaped = pi_stacking( mol, protein, 7.5) alpha = pi_rec['isalpha'] & pi_paralel beta = pi_rec['isbeta'] & pi_paralel other = ~alpha & ~beta & pi_paralel pi_vec = (alpha.sum(), beta.sum(), other.sum()) vec += tuple(pi_vec) # T-shaped Pi-Pi interaction alpha = pi_rec['isalpha'] & pi_tshaped beta = pi_rec['isbeta'] & pi_tshaped other = ~alpha & ~beta & pi_tshaped pi_t_vec = (alpha.sum(), beta.sum(), other.sum()) # Pi-cation (<6A) pi_rec, cat_mol, strict = pi_cation(protein, mol, 6) alpha = pi_rec['isalpha'] & strict beta = pi_rec['isbeta'] & strict other = ~alpha & ~beta & strict pi_cat_vec = (alpha.sum(), beta.sum(), other.sum()) pi_mol, cat_rec, strict = pi_cation(mol, protein, 6) alpha = cat_rec['isalpha'] & strict beta = cat_rec['isbeta'] & strict other = ~alpha & ~beta & strict pi_cat_vec += (alpha.sum(), beta.sum(), other.sum()) vec += tuple(pi_cat_vec) # T-shape (perpendicular Pi's) (<7.5A) vec += tuple(pi_t_vec) # Active site flexibility (<4A) acitve_site = close_contacts( mol_dict[mol_dict['atomicnum'] != 1], protein_dict[protein_dict['atomicnum'] != 1], cutoff=4)[1] backbone = acitve_site['isbackbone'] alpha = acitve_site['isalpha'] beta = acitve_site['isbeta'] other = ~alpha & ~beta as_flex = ((backbone & alpha).sum(), (backbone & beta).sum(), (backbone & other).sum(), (~backbone & alpha).sum(), (~backbone & beta).sum(), (~backbone & other).sum(), len(acitve_site)) vec += tuple(as_flex) # Salt bridges (<5.5) salt_bridge_dict = salt_bridges(mol, protein, 5.5)[1] vec += (salt_bridge_dict['isalpha'].sum(), salt_bridge_dict['isbeta'].sum(), (~salt_bridge_dict['isalpha'] & ~salt_bridge_dict['isbeta']).sum(), len(salt_bridge_dict)) # Rotatable bonds vec += mol.num_rotors, if desc is None: desc = np.zeros(len(vec), dtype=float) desc = np.vstack((desc, np.array(vec, dtype=float))) return desc[1:]
def InteractionFingerprint(ligand, protein, strict=True): """Interaction fingerprint accomplished by converting the molecular interaction of ligand-protein into bit array according to the residue of choice and the interaction. For every residue (One row = one residue) there are eight bits which represent eight type of interactions: - (Column 0) hydrophobic contacts - (Column 1) aromatic face to face - (Column 2) aromatic edge to face - (Column 3) hydrogen bond (protein as hydrogen bond donor) - (Column 4) hydrogen bond (protein as hydrogen bond acceptor) - (Column 5) salt bridges (protein positively charged) - (Column 6) salt bridges (protein negatively charged) - (Column 7) salt bridges (ionic bond with metal ion) Parameters ---------- ligand, protein : oddt.toolkit.Molecule object Molecules, which are analysed in order to find interactions. strict : bool (deafult = True) If False, do not include condition, which informs whether atoms form 'strict' H-bond (pass all angular cutoffs). Returns ------- InteractionFingerprint : numpy array Vector of calculated IFP (size = no residues * 8 type of interaction) """ resids = np.unique(protein.atom_dict['resid']) IFP = np.zeros((len(resids), 8), dtype=np.uint8) # hydrophobic contacts (column = 0) hydrophobic = hydrophobic_contacts(protein, ligand)[0]['resid'] np.add.at(IFP, (np.searchsorted(resids, np.sort(hydrophobic)[::-1]), 0), 1) # aromatic face to face (Column = 1), aromatic edge to face (Column = 2) rings, _, strict_parallel, strict_perpendicular = pi_stacking( protein, ligand) np.add.at(IFP, (np.searchsorted( resids, np.sort(rings[strict_parallel]['resid'])[::-1]), 1), 1) np.add.at(IFP, (np.searchsorted( resids, np.sort(rings[strict_perpendicular]['resid'])[::-1]), 2), 1) # h-bonds, protein as a donor (Column = 3) _, donors, strict0 = hbond_acceptor_donor(ligand, protein) if strict is False: strict0 = None np.add.at(IFP, (np.searchsorted( resids, np.sort(donors[strict0]['resid'])[::-1]), 3), 1) # h-bonds, protein as an acceptor (Column = 4) acceptors, _, strict1 = hbond_acceptor_donor(protein, ligand) if strict is False: strict1 = None np.add.at(IFP, (np.searchsorted( resids, np.sort(acceptors[strict1]['resid'])[::-1]), 4), 1) # salt bridges, protein positively charged (Column = 5) plus, _ = salt_bridge_plus_minus(protein, ligand) np.add.at(IFP, (np.searchsorted(resids, np.sort(plus['resid'])[::-1]), 5), 1) # salt bridges, protein negatively charged (Colum = 6) _, minus = salt_bridge_plus_minus(ligand, protein) np.add.at(IFP, (np.searchsorted(resids, np.sort(minus['resid'])[::-1]), 6), 1) # salt bridges, ionic bond with metal ion (Column = 7) _, metal, strict2 = acceptor_metal(protein, ligand) if strict is False: strict2 = None np.add.at(IFP, (np.searchsorted( resids, np.sort(metal[strict2]['resid'])[::-1]), 7), 1) return IFP.flatten()
def SimpleInteractionFingerprint(ligand, protein, strict=True): """Based on http://dx.doi.org/10.1016/j.csbj.2014.05.004. Every IFP consists of 8 bits per amino acid (One row = one amino acid) and present eight type of interaction: - (Column 0) hydrophobic contacts - (Column 1) aromatic face to face - (Column 2) aromatic edge to face - (Column 3) hydrogen bond (protein as hydrogen bond donor) - (Column 4) hydrogen bond (protein as hydrogen bond acceptor) - (Column 5) salt bridges (protein positively charged) - (Column 6) salt bridges (protein negatively charged) - (Column 7) salt bridges (ionic bond with metal ion) Returns matrix, which is sorted according to this pattern : 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', ''. The '' means cofactor. Index of amino acid in pattern coresponds to row in returned matrix. Parameters ---------- ligand, protein : oddt.toolkit.Molecule object Molecules, which are analysed in order to find interactions. strict : bool (deafult = True) If False, do not include condition, which informs whether atoms form 'strict' H-bond (pass all angular cutoffs). Returns ------- InteractionFingerprint : numpy array Vector of calculated IFP (size = 168) """ amino_acids = np.array(['', 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'], dtype='<U3') IFP = np.zeros((len(amino_acids), 8), dtype=np.uint8) # hydrophobic (Column = 0) hydrophobic = hydrophobic_contacts(protein, ligand)[0]['resname'] hydrophobic[~np.in1d(hydrophobic, amino_acids)] = '' np.add.at(IFP, (np.searchsorted(amino_acids, np.sort(hydrophobic)[::-1]), 0), 1) # aromatic face to face (Column = 1), aromatic edge to face (Column = 2) rings, _, strict_parallel, strict_perpendicular = pi_stacking( protein, ligand) rings[strict_parallel]['resname'][~np.in1d( rings[strict_parallel]['resname'], amino_acids)] = '' np.add.at(IFP, (np.searchsorted( amino_acids, np.sort(rings[strict_parallel]['resname'])[::-1]), 1), 1) rings[strict_perpendicular]['resname'][~np.in1d( rings[strict_perpendicular]['resname'], amino_acids)] = '' np.add.at(IFP, (np.searchsorted( amino_acids, np.sort(rings[strict_perpendicular]['resname'])[::-1]), 2), 1) # hbonds donated by the protein (Column = 3) _, donors, strict0 = hbond_acceptor_donor(ligand, protein) donors['resname'][~np.in1d(donors['resname'], amino_acids)] = '' if strict is False: strict0 = None np.add.at(IFP, (np.searchsorted( amino_acids, np.sort(donors[strict0]['resname'])[::-1]), 3), 1) # hbonds donated by the ligand (Column = 4) acceptors, _, strict1 = hbond_acceptor_donor(protein, ligand) acceptors['resname'][~np.in1d(acceptors['resname'], amino_acids)] = '' if strict is False: strict1 = None np.add.at(IFP, (np.searchsorted( amino_acids, np.sort(acceptors[strict1]['resname'])[::-1]), 4), 1) # ionic bond with protein cation(Column = 5) plus, _ = salt_bridge_plus_minus(protein, ligand) plus['resname'][~np.in1d(plus['resname'], amino_acids)] = '' np.add.at(IFP, (np.searchsorted(amino_acids, np.sort(plus['resname'])[::-1]), 5), 1) # ionic bond with protein anion(Column = 6) _, minus = salt_bridge_plus_minus(ligand, protein) minus['resname'][~np.in1d(minus['resname'], amino_acids)] = '' np.add.at(IFP, (np.searchsorted(amino_acids, np.sort(minus['resname'])[::-1]), 6), 1) # ionic bond with metal ion (Column = 7) _, metal, strict2 = acceptor_metal(protein, ligand) metal['resname'][~np.in1d(metal['resname'], amino_acids)] = '' if strict is False: strict2 = None np.add.at(IFP, (np.searchsorted( amino_acids, np.sort(metal[strict2]['resname'])[::-1]), 7), 1) return IFP.flatten()
def InteractionFingerprint(ligand, protein, strict=True): """Interaction fingerprint accomplished by converting the molecular interaction of ligand-protein into bit array according to the residue of choice and the interaction. For every residue (One row = one residue) there are eight bits which represent eight type of interactions: - (Column 0) hydrophobic contacts - (Column 1) aromatic face to face - (Column 2) aromatic edge to face - (Column 3) hydrogen bond (protein as hydrogen bond donor) - (Column 4) hydrogen bond (protein as hydrogen bond acceptor) - (Column 5) salt bridges (protein positively charged) - (Column 6) salt bridges (protein negatively charged) - (Column 7) salt bridges (ionic bond with metal ion) Parameters ---------- ligand, protein : oddt.toolkit.Molecule object Molecules, which are analysed in order to find interactions. strict : bool (deafult = True) If False, do not include condition, which informs whether atoms form 'strict' H-bond (pass all angular cutoffs). Returns ------- InteractionFingerprint : numpy array Vector of calculated IFP (size = no residues * 8 type of interaction) """ resids = np.unique(protein.atom_dict['resid']) IFP = np.zeros((len(resids), 8), dtype=np.uint8) # hydrophobic contacts (column = 0) hydrophobic = hydrophobic_contacts(protein, ligand)[0]['resid'] np.add.at(IFP, [np.searchsorted(resids, np.sort(hydrophobic)[::-1]), 0], 1) # aromatic face to face (Column = 1), aromatic edge to face (Column = 2) rings, _, strict_parallel, strict_perpendicular = pi_stacking( protein, ligand) np.add.at(IFP, [np.searchsorted( resids, np.sort(rings[strict_parallel]['resid'])[::-1]), 1], 1) np.add.at(IFP, [np.searchsorted( resids, np.sort(rings[strict_perpendicular]['resid'])[::-1]), 2], 1) # h-bonds, protein as a donor (Column = 3) _, donors, strict0 = hbond_acceptor_donor(ligand, protein) if strict is False: strict0 = None np.add.at(IFP, [np.searchsorted( resids, np.sort(donors[strict0]['resid'])[::-1]), 3], 1) # h-bonds, protein as an acceptor (Column = 4) acceptors, _, strict1 = hbond_acceptor_donor(protein, ligand) if strict is False: strict1 = None np.add.at(IFP, [np.searchsorted( resids, np.sort(acceptors[strict1]['resid'])[::-1]), 4], 1) # salt bridges, protein positively charged (Column = 5) plus, _ = salt_bridge_plus_minus(protein, ligand) np.add.at(IFP, [np.searchsorted(resids, np.sort(plus['resid'])[::-1]), 5], 1) # salt bridges, protein negatively charged (Colum = 6) _, minus = salt_bridge_plus_minus(ligand, protein) np.add.at(IFP, [np.searchsorted(resids, np.sort(minus['resid'])[::-1]), 6], 1) # salt bridges, ionic bond with metal ion (Column = 7) _, metal, strict2 = acceptor_metal(protein, ligand) if strict is False: strict2 = None np.add.at(IFP, [np.searchsorted( resids, np.sort(metal[strict2]['resid'])[::-1]), 7], 1) return IFP.flatten()
def SimpleInteractionFingerprint(ligand, protein, strict=True): """Based on http://dx.doi.org/10.1016/j.csbj.2014.05.004. Every IFP consists of 8 bits per amino acid (One row = one amino acid) and present eight type of interaction: - (Column 0) hydrophobic contacts - (Column 1) aromatic face to face - (Column 2) aromatic edge to face - (Column 3) hydrogen bond (protein as hydrogen bond donor) - (Column 4) hydrogen bond (protein as hydrogen bond acceptor) - (Column 5) salt bridges (protein positively charged) - (Column 6) salt bridges (protein negatively charged) - (Column 7) salt bridges (ionic bond with metal ion) Returns matrix, which is sorted according to this pattern : 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', ''. The '' means cofactor. Index of amino acid in pattern coresponds to row in returned matrix. Parameters ---------- ligand, protein : oddt.toolkit.Molecule object Molecules, which are analysed in order to find interactions. strict : bool (deafult = True) If False, do not include condition, which informs whether atoms form 'strict' H-bond (pass all angular cutoffs). Returns ------- InteractionFingerprint : numpy array Vector of calculated IFP (size = 168) """ amino_acids = np.array(['', 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'], dtype='<U3') IFP = np.zeros((len(amino_acids), 8), dtype=np.uint8) # hydrophobic (Column = 0) hydrophobic = hydrophobic_contacts(protein, ligand)[0]['resname'] hydrophobic[~np.in1d(hydrophobic, amino_acids)] = '' np.add.at(IFP, [np.searchsorted(amino_acids, np.sort(hydrophobic)[::-1]), 0], 1) # aromatic face to face (Column = 1), aromatic edge to face (Column = 2) rings, _, strict_parallel, strict_perpendicular = pi_stacking( protein, ligand) rings[strict_parallel]['resname'][~np.in1d( rings[strict_parallel]['resname'], amino_acids)] = '' np.add.at(IFP, [np.searchsorted( amino_acids, np.sort(rings[strict_parallel]['resname'])[::-1]), 1], 1) rings[strict_perpendicular]['resname'][~np.in1d( rings[strict_perpendicular]['resname'], amino_acids)] = '' np.add.at(IFP, [np.searchsorted( amino_acids, np.sort(rings[strict_perpendicular]['resname'])[::-1]), 2], 1) # hbonds donated by the protein (Column = 3) _, donors, strict0 = hbond_acceptor_donor(ligand, protein) donors['resname'][~np.in1d(donors['resname'], amino_acids)] = '' if strict is False: strict0 = None np.add.at(IFP, [np.searchsorted( amino_acids, np.sort(donors[strict0]['resname'])[::-1]), 3], 1) # hbonds donated by the ligand (Column = 4) acceptors, _, strict1 = hbond_acceptor_donor(protein, ligand) acceptors['resname'][~np.in1d(acceptors['resname'], amino_acids)] = '' if strict is False: strict1 = None np.add.at(IFP, [np.searchsorted( amino_acids, np.sort(acceptors[strict1]['resname'])[::-1]), 4], 1) # ionic bond with protein cation(Column = 5) plus, _ = salt_bridge_plus_minus(protein, ligand) plus['resname'][~np.in1d(plus['resname'], amino_acids)] = '' np.add.at(IFP, [np.searchsorted(amino_acids, np.sort(plus['resname'])[::-1]), 5], 1) # ionic bond with protein anion(Column = 6) _, minus = salt_bridge_plus_minus(ligand, protein) minus['resname'][~np.in1d(minus['resname'], amino_acids)] = '' np.add.at(IFP, [np.searchsorted(amino_acids, np.sort(minus['resname'])[::-1]), 6], 1) # ionic bond with metal ion (Column = 7) _, metal, strict2 = acceptor_metal(protein, ligand) metal['resname'][~np.in1d(metal['resname'], amino_acids)] = '' if strict is False: strict2 = None np.add.at(IFP, [np.searchsorted( amino_acids, np.sort(metal[strict2]['resname'])[::-1]), 7], 1) return IFP.flatten()
def build(self, ligands, protein = None): """ Descriptor building method Parameters ---------- ligands: array-like An array of generator of oddt.toolkit.Molecule objects for which the descriptor is computed protein: oddt.toolkit.Molecule object (default=None) Protein object to be used while generating descriptors. If none, then the default protein (from constructor) is used. Otherwise, protein becomes new global and default protein. Returns ------- descs: numpy array, shape=[n_samples, 351] An array of binana descriptors, aligned with input ligands """ if protein: self.set_protein(protein) else: protein = self.protein protein_dict = protein.atom_dict desc = None for mol in ligands: mol_dict = mol.atom_dict vec = np.array([], dtype=float) vec = tuple() # Vina ### TODO: Asynchronous output from vina, push command to score and retrieve at the end? ### TODO: Check if ligand has vina scores scored_mol = self.vina.score(mol, single=True)[0].data vina_scores = ['vina_affinity', 'vina_gauss1', 'vina_gauss2', 'vina_repulsion', 'vina_hydrophobic', 'vina_hydrogen'] vec += tuple([scored_mol[key] for key in vina_scores]) # Close Contacts (<4A) vec += tuple(self.cc_4.build(mol, single=True).flatten()) # Electrostatics (<4A) ele_types = (('A', 'A'), ('A', 'C'), ('A', 'CL'), ('A', 'F'), ('A', 'FE'), ('A', 'HD'), ('A', 'MG'), ('A', 'MN'), ('A', 'N'), ('A', 'NA'), ('A', 'OA'), ('A', 'SA'), ('A', 'ZN'), ('BR', 'C'), ('BR', 'HD'), ('BR', 'OA'), ('C', 'C'), ('C', 'CL'), ('C', 'F'), ('C', 'HD'), ('C', 'MG'), ('C', 'MN'), ('C', 'N'), ('C', 'NA'), ('C', 'OA'), ('C', 'SA'), ('C', 'ZN'), ('CL', 'FE'), ('CL', 'HD'), ('CL', 'MG'), ('CL', 'N'), ('CL', 'OA'), ('CL', 'ZN'), ('F', 'HD'), ('F', 'N'), ('F', 'OA'), ('F', 'SA'), ('F', 'ZN'), ('FE', 'HD'), ('FE', 'N'), ('FE', 'OA'), ('HD', 'HD'), ('HD', 'I'), ('HD', 'MG'), ('HD', 'MN'), ('HD', 'N'), ('HD', 'NA'), ('HD', 'OA'), ('HD', 'P'), ('HD', 'S'), ('HD', 'SA'), ('HD', 'ZN'), ('MG', 'NA'), ('MG', 'OA'), ('MN', 'N'), ('MN', 'OA'), ('N', 'N'), ('N', 'NA'), ('N', 'OA'), ('N', 'SA'), ('N', 'ZN'), ('NA', 'OA'), ('NA', 'SA'), ('NA', 'ZN'), ('OA', 'OA'), ('OA', 'SA'), ('OA', 'ZN'), ('S', 'ZN'), ('SA', 'ZN'), ('A', 'BR'), ('A', 'I'), ('A', 'P'), ('A', 'S'), ('BR', 'N'), ('BR', 'SA'), ('C', 'FE'), ('C', 'I'), ('C', 'P'), ('C', 'S'), ('CL', 'MN'), ('CL', 'NA'), ('CL', 'P'), ('CL', 'S'), ('CL', 'SA'), ('CU', 'HD'), ('CU', 'N'), ('FE', 'NA'), ('FE', 'SA'), ('I', 'N'), ('I', 'OA'), ('MG', 'N'), ('MG', 'P'), ('MG', 'S'), ('MG', 'SA'), ('MN', 'NA'), ('MN', 'P'), ('MN', 'S'), ('MN', 'SA'), ('N', 'P'), ('N', 'S'), ('NA', 'P'), ('NA', 'S'), ('OA', 'P'), ('OA', 'S'), ('P', 'S'), ('P', 'SA'), ('P', 'ZN'), ('S', 'SA'), ('SA', 'SA')) ele_rec_types, ele_lig_types = zip(*ele_types) ele_mol_atoms = atoms_by_type(mol_dict, ele_lig_types, 'atom_types_ad4') ele_rec_atoms = atoms_by_type(protein_dict, ele_rec_types, 'atom_types_ad4') ele = tuple() for r_t, m_t in ele_types: mol_ele_dict, rec_ele_dict = interactions.close_contacts(ele_mol_atoms[m_t], ele_rec_atoms[r_t], 4) if len(mol_ele_dict) and len(rec_ele_dict): ele += (mol_ele_dict['charge'] * rec_ele_dict['charge']/ np.sqrt((mol_ele_dict['coords'] - rec_ele_dict['coords'])**2).sum(axis=-1) * 138.94238460104697e4).sum(), # convert to J/mol else: ele += 0, vec += tuple(ele) # Ligand Atom Types ligand_atom_types = ['A', 'BR', 'C', 'CL', 'F', 'HD', 'I', 'N', 'NA', 'OA', 'P', 'S', 'SA'] atoms = atoms_by_type(mol_dict, ligand_atom_types, 'atom_types_ad4') atoms_counts = [len(atoms[t]) for t in ligand_atom_types] vec += tuple(atoms_counts) # Close Contacts (<2.5A) vec += tuple(self.cc_25.build(mol, single=True).flatten()) # H-Bonds (<4A) hbond_mol, hbond_rec, strict = interactions.hbond(mol, protein, 4) # Retain only strict hbonds hbond_mol = hbond_mol[strict] hbond_rec = hbond_rec[strict] backbone = hbond_rec['isbackbone'] alpha = hbond_rec['isalpha'] beta = hbond_rec['isbeta'] other = ~alpha & ~beta donor_mol = hbond_mol['isdonor'] donor_rec = hbond_rec['isdonor'] hbond_vec = ((donor_mol & backbone & alpha).sum(), (donor_mol & backbone & beta).sum(), (donor_mol & backbone & other).sum(), (donor_mol & ~backbone & alpha).sum(), (donor_mol & ~backbone & beta).sum(), (donor_mol & ~backbone & other).sum(), (donor_rec & backbone & alpha).sum(), (donor_rec & backbone & beta).sum(), (donor_rec & backbone & other).sum(), (donor_rec & ~backbone & alpha).sum(), (donor_rec & ~backbone & beta).sum(), (donor_rec & ~backbone & other).sum()) vec += tuple(hbond_vec) # Hydrophobic contacts (<4A) hydrophobic = interactions.hydrophobic_contacts(mol, protein, 4)[1] backbone = hydrophobic['isbackbone'] alpha = hydrophobic['isalpha'] beta = hydrophobic['isbeta'] other = ~alpha & ~beta hyd_vec = ((backbone & alpha).sum(), (backbone & beta).sum(), (backbone & other).sum(), (~backbone & alpha).sum(), (~backbone & beta).sum(), (~backbone & other).sum(), len(hydrophobic)) vec += tuple(hyd_vec) # Pi-stacking (<7.5A) pi_mol, pi_rec, pi_paralel, pi_tshaped = interactions.pi_stacking(mol, protein, 7.5) alpha = pi_rec['isalpha'] & pi_paralel beta = pi_rec['isbeta'] & pi_paralel other = ~alpha & ~beta & pi_paralel pi_vec = (alpha.sum(), beta.sum(), other.sum()) vec += tuple(pi_vec) # count T-shaped Pi-Pi interaction alpha = pi_rec['isalpha'] & pi_tshaped beta = pi_rec['isbeta'] & pi_tshaped other = ~alpha & ~beta & pi_tshaped pi_t_vec = (alpha.sum(), beta.sum(), other.sum()) # Pi-cation (<6A) pi_rec, cat_mol, strict = interactions.pi_cation(protein, mol, 6) alpha = pi_rec['isalpha'] & strict beta = pi_rec['isbeta'] & strict other = ~alpha & ~beta & strict pi_cat_vec = (alpha.sum(), beta.sum(), other.sum()) pi_mol, cat_rec, strict = interactions.pi_cation(mol, protein, 6) alpha = cat_rec['isalpha'] & strict beta = cat_rec['isbeta'] & strict other = ~alpha & ~beta & strict pi_cat_vec += (alpha.sum(), beta.sum(), other.sum()) vec += tuple(pi_cat_vec) # T-shape (perpendicular Pi's) (<7.5A) vec += tuple(pi_t_vec) # Active site flexibility (<4A) acitve_site = interactions.close_contacts(mol_dict, protein_dict, 4)[1] backbone = acitve_site['isbackbone'] alpha = acitve_site['isalpha'] beta = acitve_site['isbeta'] other = ~alpha & ~beta as_flex = ((backbone & alpha).sum(), (backbone & beta).sum(), (backbone & other).sum(), (~backbone & alpha).sum(), (~backbone & beta).sum(), (~backbone & other).sum(), len(acitve_site)) vec += tuple(as_flex) # Salt bridges (<5.5) salt_bridges = interactions.salt_bridges(mol, protein, 5.5)[1] vec += (salt_bridges['isalpha'].sum(), salt_bridges['isbeta'].sum(), (~salt_bridges['isalpha'] & ~salt_bridges['isbeta']).sum(), len(salt_bridges)) # Rotatable bonds vec += mol.num_rotors, if desc is None: desc = np.zeros(len(vec), dtype=float) desc = np.vstack((desc, np.array(vec, dtype=float))) return desc[1:]
def InteractionCheck(ppath, Listoflig, cur_dir): global proteinpath proteinpath = ppath os.chdir(os.path.dirname(proteinpath)) # pname = os.path.basename(proteinpath) # protein = next(oddt.toolkit.readfile('pdb', proteinpath, removeHs=False, cleanupSubstructures=False, sanitize=False)) try: protein = next(oddt.toolkit.readfile('pdb', proteinpath, removeHs=False)) protein.protein = True except Exception as e: print("Input structure could not be split into protein and ligand. Please check ligand identifier.") f2 = open(os.path.join(os.path.basename(proteinpath), 'ErrorLog.txt'), 'w') f2.write(str(e)) f2.close() for ligand_object in Listoflig: ligandname = ligand_object.PoseNameExt ResReport = ligand_object.PoseName + "_ResidueReport.csv" path = os.path.join(cur_dir, 'Fingerprint', ResReport) file = open(path, 'w') file.write("Ligand interactions with protein residues\n") file.close() # Read in and define the reference ligand ligand = next(oddt.toolkit.readfile('pdb', ligandname, removeHs=False)) # Hydrophobic interactions p_hydroph, l_hydroph = interactions.hydrophobic_contacts(protein, ligand) InteractionsFile(p_hydroph, l_hydroph, path, 'hydrophobic') # h bonds p_hbonds, l_hbonds, strict = interactions.hbonds(protein, ligand) InteractionsFile(p_hbonds, l_hbonds, path, 'hydrogen bond') # halogens p_halogen, l_halogen, strict = interactions.halogenbonds(protein, ligand) InteractionsFile(p_halogen, l_halogen, path, 'halogen bond') # pistacking bonds pi_interactions = interactions.pi_stacking(protein, ligand) InteractionsFile(pi_interactions[0], pi_interactions[2], path, 'pi stacking') # salt bridges p_salt_bridges, l_salt_bridges = interactions.salt_bridges(protein, ligand) InteractionsFile(p_salt_bridges, l_salt_bridges, path, 'salt bridge') # pi_cation p_pi_cation, l_pi_cation, strict = interactions.pi_cation(protein, ligand) InteractionsFile(p_pi_cation, l_pi_cation, path, 'pi cation') # acceptor_metal bonds p_acceptor_metal_a, acceptor_metal_a, strict = interactions.acceptor_metal(protein, ligand) InteractionsFile(p_acceptor_metal_a, acceptor_metal_a, path, 'acceptor metal') # pi_metal bonds p_pi_metal, l_pi_metal, strict = interactions.pi_metal(protein, ligand) InteractionsFile(p_pi_metal, l_pi_metal, path, 'pi metal')