def get_interface(st, dist): ''' Detects interface residues within a distance(dist) Assumes two chains, i.e. a unique interface set per chain. ''' select_ats = [] for at in st.get_atoms(): # Skip Hydrogens to reduce time if at.element != 'H': select_ats.append(at) nbsearch = NeighborSearch(select_ats) interface = {} # Sets are more efficient than lists. Use sets when order is not relevant for ch in st[0]: interface[ch.id] = set() for at1, at2 in nbsearch.search_all(dist): #Only different chains res1 = at1.get_parent() ch1 = res1.get_parent() res2 = at2.get_parent() ch2 = res2.get_parent() if ch1 != ch2: interface[ch1.id].add(res1) interface[ch2.id].add(res2) return interface
def test_neighbor_search(self): """NeighborSearch: Find nearby randomly generated coordinates. Based on the self test in Bio.PDB.NeighborSearch. """ # This comment stops black style adding a blank line here, which causes flake8 D202. class RandomAtom: def __init__(self): self.coord = 100 * random(3) def get_coord(self): return self.coord for i in range(0, 20): atoms = [RandomAtom() for j in range(100)] ns = NeighborSearch(atoms) hits = ns.search_all(5.0) self.assertTrue(isinstance(hits, list), hits) self.assertTrue(len(hits) >= 0, hits) x = array([250, 250, 250]) # Far away from our random atoms self.assertEqual([], ns.search(x, 5.0, "A")) self.assertEqual([], ns.search(x, 5.0, "R")) self.assertEqual([], ns.search(x, 5.0, "C")) self.assertEqual([], ns.search(x, 5.0, "M")) self.assertEqual([], ns.search(x, 5.0, "S"))
def getInterface(struct, useCA=True, res2res_dist=8): if useCA: atomList = [ atom for atom in struct[0].get_atoms() if atom.name.startswith("CA") ] else: atomList = [ atom for atom in struct[0].get_atoms() if not atom.name.startswith("H") ] chains = struct[0].child_list searcher = NeighborSearch(atomList) allNeigs = searcher.search_all(res2res_dist, level="R") residuesBindingSitePerChain = { chain.get_id(): { "bindingSite": [] } for chain in chains } for res1, res2 in allNeigs: pdbId1, modelId1, chainId1, resId1 = res1.get_full_id() pdbId2, modelId2, chainId2, resId2 = res2.get_full_id() if chainId1 != chainId2: residuesBindingSitePerChain[chainId1]["bindingSite"].append( res1.get_id()) residuesBindingSitePerChain[chainId2]["bindingSite"].append( res2.get_id()) return residuesBindingSitePerChain
def compute_interactions(pdb_name): # Ensure that the PDB name is lowercase pdb_name = pdb_name.lower() # Get the pdb structure s = Structure.objects.get(protein_conformation__protein__entry_name=pdb_name) # Get the preferred chain preferred_chain = s.preferred_chain.split(',')[0] # Get the Biopython structure for the PDB s = pdb_get_structure(pdb_name) # Get all atoms atom_list = Selection.unfold_entities(s.get_chains().__next__(), 'A') # Search for all neighbouring residues ns = NeighborSearch(atom_list) all_neighbors = ns.search_all(4.5, "R") # Filter all pairs containing non AA residues all_aa_neighbors = [pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])] # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart all_aa_neighbors = [pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES] # For each pair of interacting residues, determine the type of interaction interactions = [InteractingPair(res_pair[0], res_pair[1], get_interactions(res_pair[0], res_pair[1])) for res_pair in all_aa_neighbors] # Split unto classified and unclassified. classified = [interaction for interaction in interactions if len(interaction.get_interactions()) > 0] return classified
def findNeigChains(struct, chainIdL, chainIdR, res2res_dist=6, minContacts=20): searcher = NeighborSearch([ atom for atom in struct[0].get_atoms() if atom is not None and not atom.name.startswith("H") ]) allNeigs = searcher.search_all(res2res_dist, level="C") # print(allNeigs) chainL = struct[0][chainIdL] chainR = struct[0][chainIdR] ligandChains = set([chainL]) receptorChains = set([chainR]) addedChains = ligandChains.union(receptorChains) for neigsGroup in allNeigs: searcher = NeighborSearch([ atom for chain in neigsGroup for atom in chain.get_atoms() if atom is not None and not atom.name.startswith("H") and atom.get_parent().resname != "HOH" ]) resNeigs = searcher.search_all(res2res_dist, level="R") numContacts = 0 for r1, r2 in resNeigs: if r1.get_parent().get_id() != r2.get_parent().get_id(): numContacts += 1 # print(neigsGroup, numContacts) if numContacts < minContacts: continue if chainL in neigsGroup or chainR in neigsGroup: for chain in neigsGroup: if chain not in addedChains: if chainR in neigsGroup and chainL in neigsGroup: if bool(random.getrandbits(1)): receptorChains.add(chain) else: ligandChains.add(chain) addedChains.add(chain) elif chainR in neigsGroup: receptorChains.add(chain) addedChains.add(chain) elif chainL in neigsGroup: ligandChains.add(chain) addedChains.add(chain) return ligandChains, receptorChains
def getPairsOfResiduesInContact(self, structureL, structureR): ''' Computes which amino acids of ligand are in contact with which amino acids of receptor @param structureL: Bio.PDB.Structure. Structure of ligand (bound state if available) @param structureR: Bio.PDB.Structure. Structure of receptor (bound state if available). @return positiveContacts: Set {(Bio.PDB.Residue.fullResId (from bound structure structureL), Bio.PDB.Residue.fullResId (from bound structure structureR))} @return chainsNotContactL: Set { str(chainId structureL)} @return chainsNotContactR: Set { str(chainId structureR)} ''' try: atomListL = [ atom for atom in structureL.child_list[0].get_atoms() if not atom.name.startswith("H") ] except IndexError: raise NoValidPDBFile("Problems parsing pdbFile 1") try: atomListR = [ atom for atom in structureR.child_list[0].get_atoms() if not atom.name.startswith("H") ] except IndexError: raise NoValidPDBFile("Problems parsing pdbFile 2") searcher = NeighborSearch(atomListL + atomListR) allNeigs = searcher.search_all(self.res2res_dist, level="R") lStructId = structureL.get_id() rStructId = structureR.get_id() positiveContacts = set([]) chainsInContactL = set([]) chainsInContactR = set([]) for res1, res2 in allNeigs: pdbId1, modelId1, chainId1, resId1 = res1.get_full_id() pdbId2, modelId2, chainId2, resId2 = res2.get_full_id() fullResId1 = res1.get_full_id() fullResId2 = res2.get_full_id() if pdbId1 == lStructId and pdbId2 == rStructId: positiveContacts.add((fullResId1, fullResId2)) chainsInContactL.add(fullResId1[2]) chainsInContactR.add(fullResId2[2]) elif pdbId1 == rStructId and pdbId2 == lStructId: positiveContacts.add((fullResId2, fullResId1)) chainsInContactL.add(fullResId2[2]) chainsInContactR.add(fullResId1[2]) if CONSIDER_HOMOOLIG_AS_POS: positiveContacts, chainsInContactL, chainsInContactR = self.fixHomooligomers( structureL, structureR, positiveContacts, chainsInContactL, chainsInContactR) allChainsL = set([elem.get_id() for elem in structureL[0].get_list()]) allChainsR = set([elem.get_id() for elem in structureR[0].get_list()]) chainsNotContactL = allChainsL.difference(chainsInContactL) chainsNotContactR = allChainsR.difference(chainsInContactR) return positiveContacts, chainsNotContactL, chainsNotContactR
def getPairsOfResiduesInContact(self, structureL, structureR): ''' Computes which amino acids of ligand are in contact with which amino acids of receptor :param structureL: Bio.PDB.Structure. Structure of ligand unbound state if available :param structureR: Bio.PDB.Structure. Structure of receptor unbound state if available. :return positiveContacts, chainsNotContactL, chainsNotContactR positiveContacts: Set {( Bio.PDB.Residue.fullResId (from bound structure structureL), Bio.PDB.Residue.fullResId (from bound structure structureR) ) } chainsNotContactL: Set { Bio.PDB.Chain.get_id()} for ligand chains that are not in contact chainsNotContactR: Set { Bio.PDB.Chain.get_id()} for receptor chains that are not in contact ''' try: atomListL = [ atom for atom in structureL.child_list[0].get_atoms() if not atom.name.startswith("H") ] except IndexError: raise NoValidPDBFile("Problems parsing pdbFile 1") try: atomListR = [ atom for atom in structureR.child_list[0].get_atoms() if not atom.name.startswith("H") ] except IndexError: raise NoValidPDBFile("Problems parsing pdbFile 2") searcher = NeighborSearch(atomListL + atomListR) allNeigs = searcher.search_all(self.res2res_dist, level="R") lStructId = structureL.get_id() rStructId = structureR.get_id() positiveContactsResidues = set([]) chainsInContactL = set([]) chainsInContactR = set([]) for res1, res2 in allNeigs: pdbId1, modelId1, chainId1, resId1 = res1.get_full_id() pdbId2, modelId2, chainId2, resId2 = res2.get_full_id() if pdbId1 == lStructId and pdbId2 == rStructId: positiveContactsResidues.add((res1, res2)) chainsInContactL.add(chainId1) chainsInContactR.add(chainId2) elif pdbId1 == rStructId and pdbId2 == lStructId: positiveContactsResidues.add((res2, res1)) chainsInContactL.add(chainId2) chainsInContactR.add(chainId1) allChainsL = set([elem.get_id() for elem in structureL[0].get_list()]) allChainsR = set([elem.get_id() for elem in structureR[0].get_list()]) chainsNotContactL = allChainsL.difference(chainsInContactL) chainsNotContactR = allChainsR.difference(chainsInContactR) return positiveContactsResidues, chainsNotContactL, chainsNotContactR
def test_neighbor_search(self): """NeighborSearch: Find nearby randomly generated coordinates. Based on the self test in Bio.PDB.NeighborSearch. """ class RandomAtom: def __init__(self): self.coord = 100 * random(3) def get_coord(self): return self.coord for i in range(0, 20): atoms = [RandomAtom() for j in range(100)] ns = NeighborSearch(atoms) hits = ns.search_all(5.0) self.assert_(hits >= 0)
def quick_neighbor_search_test(): #Based on the self test in Bio.PDB.NeighborSearch from numpy.random import random from Bio.PDB.NeighborSearch import NeighborSearch class Atom: def __init__(self): self.coord=(100*random(3)) def get_coord(self): return self.coord for i in range(0, 20): al = [Atom() for j in range(100)] ns=NeighborSearch(al) hits = ns.search_all(5.0) assert hits >= 0 print "Done"
def test_neighbor_search(self): """NeighborSearch: Find nearby randomly generated coordinates. Based on the self test in Bio.PDB.NeighborSearch. """ class RandomAtom: def __init__(self): self.coord = 100 * random(3) def get_coord(self): return self.coord for i in range(0, 20): atoms = [RandomAtom() for j in range(100)] ns = NeighborSearch(atoms) hits = ns.search_all(5.0) self.assertTrue(isinstance(hits, list), hits) self.assertTrue(len(hits) >= 0, hits)
def quick_neighbor_search_test(): #Based on the self test in Bio.PDB.NeighborSearch from numpy.random import random from Bio.PDB.NeighborSearch import NeighborSearch class Atom: def __init__(self): self.coord = (100 * random(3)) def get_coord(self): return self.coord for i in range(0, 20): al = [Atom() for j in range(100)] ns = NeighborSearch(al) hits = ns.search_all(5.0) assert hits >= 0 print "Done"
def test_neighbor_search(self): """NeighborSearch: Find nearby randomly generated coordinates. Based on the self test in Bio.PDB.NeighborSearch. """ class RandomAtom: def __init__(self): self.coord = 100 * random(3) def get_coord(self): return self.coord for i in range(0, 20): atoms = [RandomAtom() for j in range(100)] ns = NeighborSearch(atoms) hits = ns.search_all(5.0) self.assertTrue(isinstance(hits, list), hits) self.assertTrue(len(hits) >= 0, hits) x = array([250,250,250]) #Far away from our random atoms self.assertEqual([], ns.search(x, 5.0, "A")) self.assertEqual([], ns.search(x, 5.0, "R")) self.assertEqual([], ns.search(x, 5.0, "C")) self.assertEqual([], ns.search(x, 5.0, "M")) self.assertEqual([], ns.search(x, 5.0, "S"))
def get_backbone_links(struc, backbone_atoms, covlnk, join_models=True): """ Get links making the main chain """ # TODO differenciate Protein and NA cov_links = [] for mod in struc: bckats = [] for atm in struc[mod.id].get_atoms(): if atm.id in backbone_atoms: if atm.disordered_flag: bckats.append(atm.selected_child) else: bckats.append(atm) if bckats: nbsearch = NeighborSearch(bckats) for at1, at2 in nbsearch.search_all(covlnk): if not same_residue(at1, at2) \ and (join_models or same_model(at1.get_parent(), at2.get_parent())): cov_links.append( sorted([at1, at2], key=lambda x: x.serial_number)) else: print("Warning: No backbone atoms defined") return cov_links
def get_contacts(atoms=[]): neighbor_search = NeighborSearch(atoms) neighbors = neighbor_search.search_all(radius=SEARCH_RADIUS) #print("Size of Neighbhor Map: %d" % len(neighbors)) #neighbors_map: dictionary with tuple(ca_i,ca_j) as keys - eg: [(ca1,ca2):1,(ca21,ca45):1,...] neighbors_map = {(x[0].get_serial_number(), x[1].get_serial_number()): 1 for x in neighbors} no_of_native_contacts = len(neighbors_map) contact_map = {} n = len(atoms) #constuct contact map #total n(n-1)/2 entries, ( which excludes diagonal and symmetric entries from n*n pairs) for x in range(1, n - 1): for y in range((x + 1), n): #put {(Ci,Cj):1} if present if present in neighbors map, 0 otherwise contact_map[(x, y)] = neighbors_map.get( (atoms[x].get_serial_number(), atoms[y].get_serial_number()), 0) # print("In get_contacts:Size of atoms: %d :Length of the contact map: %d" % (n,len(contact_map))) #print contact_map return contact_map, no_of_native_contacts
def test_neighbor_search(self): """NeighborSearch: Find nearby randomly generated coordinates. Based on the self test in Bio.PDB.NeighborSearch. """ class RandomAtom: def __init__(self): self.coord = 100 * random(3) def get_coord(self): return self.coord for i in range(0, 20): atoms = [RandomAtom() for j in range(100)] ns = NeighborSearch(atoms) hits = ns.search_all(5.0) self.assertIsInstance(hits, list) self.assertGreaterEqual(len(hits), 0) x = array([250, 250, 250]) # Far away from our random atoms self.assertEqual([], ns.search(x, 5.0, "A")) self.assertEqual([], ns.search(x, 5.0, "R")) self.assertEqual([], ns.search(x, 5.0, "C")) self.assertEqual([], ns.search(x, 5.0, "M")) self.assertEqual([], ns.search(x, 5.0, "S"))
def compute_interactions(pdb_name,save_to_db = False): do_distances = True do_interactions = True distances = [] classified = [] # Ensure that the PDB name is lowercase pdb_name = pdb_name.lower() # Get the pdb structure struc = Structure.objects.get(protein_conformation__protein__entry_name=pdb_name) pdb_io = StringIO(struc.pdb_data.pdb) # Get the preferred chain preferred_chain = struc.preferred_chain.split(',')[0] # Get the Biopython structure for the PDB s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0] #s = pdb_get_structure(pdb_name)[0] chain = s[preferred_chain] #return classified, distances # remove residues without GN and only those matching receptor. residues = struc.protein_conformation.residue_set.exclude(generic_number=None).all().prefetch_related('generic_number') dbres = {} dblabel = {} for r in residues: dbres[r.sequence_number] = r dblabel[r.sequence_number] = r.generic_number.label ids_to_remove = [] for res in chain: if not res.id[1] in dbres.keys() and res.get_resname() != "HOH": ids_to_remove.append(res.id) for i in ids_to_remove: chain.detach_child(i) if do_distances: for i1,res1 in enumerate(chain,1): if not is_water(res1): for i2,res2 in enumerate(chain,1): if i2>i1 and not is_water(res2): # Do not calculate twice. distance = res1['CA']-res2['CA'] distances.append((dbres[res1.id[1]],dbres[res2.id[1]],distance,dblabel[res1.id[1]],dblabel[res2.id[1]])) if do_interactions: atom_list = Selection.unfold_entities(s[preferred_chain], 'A') # Search for all neighbouring residues ns = NeighborSearch(atom_list) all_neighbors = ns.search_all(4.5, "R") # Filter all pairs containing non AA residues all_aa_neighbors = [pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])] # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart all_aa_neighbors = [pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES] # For each pair of interacting residues, determine the type of interaction interactions = [InteractingPair(res_pair[0], res_pair[1], get_interactions(res_pair[0], res_pair[1]),dbres[res_pair[0].id[1]], dbres[res_pair[1].id[1]],struc) for res_pair in all_aa_neighbors] # Split unto classified and unclassified. classified = [interaction for interaction in interactions if len(interaction.get_interactions()) > 0] if save_to_db: if do_interactions: # Delete previous for faster load in InteractingResiduePair.objects.filter(referenced_structure=struc).all().delete() # bulk_pair = [] # for d in distances: # pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc) # bulk_pair.append(pair) # Create interaction dictionary interaction_pairs = {} for pair in classified: res_1 = pair.get_residue_1() res_2 = pair.get_residue_2() key = res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1]) interaction_pairs[key] = pair # POSSIBLE ADDON: support for multiple water-mediated bonds ## Obtain list of water molecules water_list = { water for residue in s[preferred_chain] if residue.get_resname() == "HOH" for water in residue.get_atoms() } if len(water_list) > 0: ## Iterate water molecules over residue atom list water_neighbors = [(water, match_res) for water in water_list for match_res in ns.search(water.coord, 3.5, "R") if not is_water(match_res)] # intersect between residues sharing the same interacting water for index_one in range(len(water_neighbors)): water_pair_one = water_neighbors[index_one] for index_two in [ index for index in range(index_one+1, len(water_neighbors)) if water_pair_one[0]==water_neighbors[index][0] ]: water_pair_two = water_neighbors[index_two] res_1 = water_pair_one[1] res_2 = water_pair_two[1] key = res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1]) # Check if interaction is polar - NOTE: this is not capturing every angle if any(get_polar_interactions(water_pair_one[0].get_parent(), water_pair_one[1])) and any(get_polar_interactions(water_pair_two[0].get_parent(), water_pair_two[1])): # NOTE: Is splitting of sidechain and backbone-mediated interactions desired? if key in interaction_pairs: interaction_pairs[key].interactions.append(WaterMediated()) else: interaction_pairs[key] = InteractingPair(res_1, res_2, [WaterMediated()], dbres[res_1.id[1]], dbres[res_2.id[1]], struc) for p in classified: p.save_into_database() if do_distances: # Distance.objects.filter(structure=struc).all().delete() bulk_distances = [] for i,d in enumerate(distances): distance = Distance(distance=int(100*d[2]),res1=d[0], res2=d[1],gn1=d[3], gn2=d[4], gns_pair='_'.join([d[3],d[4]]), structure=struc) bulk_distances.append(distance) if len(bulk_distances)>1000: pairs = Distance.objects.bulk_create(bulk_distances) bulk_distances = [] pairs = Distance.objects.bulk_create(bulk_distances) return classified, distances
# # Simple program to search contacts from Bio.PDB.NeighborSearch import NeighborSearch from Bio.PDB.PDBParser import PDBParser HBLNK = 3.5 # Define distance for a contact parser = PDBParser(PERMISSIVE=1) st = parser.get_structure('estructura', '1ubq.pdb') selecc = [] for at in st.get_atoms(): selecc.append(at) print("ATOM:", at) #Selecting all atoms. nbsearch = NeighborSearch(selecc) print("NBSEARCH:") #Searching for contacts under HBLNK ncontact = 1 for at1, at2 in nbsearch.search_all(HBLNK): print("Contact: ", ncontact) print("at1", at1, at1.get_serial_number(), at1.get_parent().get_resname()) print("at2", at2, at2.get_serial_number(), at2.get_parent().get_resname()) print() ncontact += 1
polar_ats = [] for at in st.get_atoms(): res = at.get_parent() if res.id[0].startswith('H_') or res.id[0].startswith('W') and not args.hetatm: continue if at.element in POLAR: polar_ats.append(at) print (len(polar_ats), 'POLAR Atoms found') # Preparing search nbsearch = NeighborSearch(polar_ats) at_pairs = nbsearch.search_all(HBLNK) hbs = {} for at1, at2 in sorted(at_pairs, key=lambda at_pair: at_pair[0].serial_number): res1 = at1.get_parent() res2 = at2.get_parent() # remove atom pairs from the same residue and next in sequence if res2.id[1] - res1.id[1] < 2: continue # using the contact with the shortest distance between residues dist = at1 - at2 if res1 not in hbs: hbs[res1] = {} if res2 not in hbs[res1] or dist < hbs[res1][res2][2]: hbs[res1][res2]= (at1, at2, dist)
def _get_contacts(ats_list, d_cutoff): contact_list = [] nbsearch = NeighborSearch(ats_list) for at1, at2 in nbsearch.search_all(d_cutoff): contact_list.append((at1.get_parent(), at2.get_parent(), at1 - at2)) return contact_list
def main(): parser = argparse.ArgumentParser( prog='polarContacts', description='Polar contacts detector' ) parser.add_argument( '--backonly', action='store_true', dest='backonly', help='Restrict to backbone' ) parser.add_argument( '--nowats', action='store_true', dest='nowats', help='Exclude water molecules' ) parser.add_argument('pdb_path') args = parser.parse_args() print ("Settings") print ("--------") for k,v in vars(args).items(): print ('{:10}:'.format(k),v) backonly = args.backonly nowats =args.nowats pdb_path = args.pdb_path if not pdb_path: parser.print_help() sys.exit(2) parser = PDBParser(PERMISSIVE=1) try: st = parser.get_structure('st', pdb_path) except OSError: print ("#ERROR: loading PDB") sys.exit(2) # Checking for models if len(st) > 1: print ("#WARNING: Several Models found, using only first") # Using Model 0 any way st = st[0] # Making a list of polar atoms polats = [] if backonly: selected_atoms = backbone_polars else: selected_atoms = all_polars for at in st.get_atoms(): if at.id in selected_atoms: polats.append(at) #Searching for contacts under HNLNK on diferent residues nbsearch = NeighborSearch(polats) hblist = [] for at1, at2 in nbsearch.search_all(HBLNK): if at1.get_parent() == at2.get_parent(): continue #Discard covalents and neighbours if (at1-at2) < COVLNK: continue if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1: continue # remove waters if nowats: if at1.get_parent().get_resname() in waternames \ or at2.get_parent().get_resname() in waternames: continue if at1.get_serial_number() < at2.get_serial_number(): hblist.append([StructureWrapper.Atom(at1,1),StructureWrapper.Atom(at2,1)]) else: hblist.append([StructureWrapper.Atom(at2,1),StructureWrapper.Atom(at1,1)]) print () print ("Polar contacts") print ('{:13} ({:4}, {:6}) {:13} ({:4}, {:6}) {:6} '.format( 'Atom1','Type','Charge','Atom2','type','charge','Dist (A)') ) for hb in sorted (hblist,key=lambda i: i[0].at.get_serial_number()): print ('{:14} {:14} {:6.3f} '.format( hb[0].atid(), hb[1].atid(), hb[0].at - hb[1].at ) )
parser = PDBParser(PERMISSIVE=1) print ('Parsing', args.pdb_file) # load structure from PDB file of PDB ifle handler st = parser.get_structure('STR', args.pdb_file.name) # collecting atom candidates bck_atoms=[] for at in st.get_atoms(): if at.id in PEP_BOND_ATS : bck_atoms.append(at) print (len(bck_atoms), 'candidate atoms found') # Preparing search nbsearch = NeighborSearch(bck_atoms) at_pairs = nbsearch.search_all(COVLNK) # Output sorted by atom,serial_number, nbsearch returns ordered pairs # Redirect the output with > output_list for at1, at2 in sorted(at_pairs, key=lambda at_pair: at_pair[0].serial_number): # Discard same residue if at1.get_parent() == at2.get_parent(): continue print ('{:11} : {:11} {:8.3f}'.format(atom_id(at1),atom_id(at2), at1-at2))
def main(): parser = argparse.ArgumentParser(prog='polarContacts', description='Polar contacts detector') parser.add_argument('--backonly', action='store_true', dest='backonly', help='Restrict to backbone') parser.add_argument( #Argument to plot the different number of polar contacts that each residue has '--plot', action='store_true', dest='plotMode', help='Restrict to sidechain') parser.add_argument('--nowats', action='store_true', dest='nowats', help='Exclude water molecules') parser.add_argument('--diel', type=float, action='store', dest='diel', default=1.0, help='Relative dielectric constant') parser.add_argument('--vdw', action='store', dest='vdwprm', help='VDW Paramters file') parser.add_argument('--rlib', action='store', dest='reslib', help='AminoAcid library') parser.add_argument( '--index', action='store', dest='index', help='Only select the molecules with more stable contact by an index') parser.add_argument( '--surf', action='store_true', dest='surf', help='Usa ASA', ) parser.add_argument('pdb_path') args = parser.parse_args() print("Settings") print("--------") for k, v in vars(args).items(): print('{:10}:'.format(k), v) backonly = args.backonly nowats = args.nowats plotMode = args.plotMode pdb_path = args.pdb_path vdwprm = args.vdwprm reslib = args.reslib index = args.index surf = args.surf diel = args.diel # Load VDW parameters vdwParams = VdwParamset(vdwprm) print("{} atom types loaded".format(vdwParams.ntypes)) # Load AA Library aaLib = ResiduesDataLib(reslib) print("{} amino acid atoms loaded".format(aaLib.nres)) if not pdb_path: parser.print_help() sys.exit(2) parser = PDBParser(PERMISSIVE=1) try: st = parser.get_structure('st', pdb_path) except OSError: print("#ERROR: loading PDB") sys.exit(2) # Checking for model if len(st) > 1: print("#WARNING: Several Models found, using only first") # Using Model 0 any way st = st[0] # Getting surfaces if surf: res_surfaces = NACCESS( st, naccess_binary= '/Users/daniel/Downloads/BioPhysics-energies0/NACCESS/naccess') at_surfaces = NACCESS_atomic( st, naccess_binary= '/Users/daniel/Downloads/BioPhysics-energies0/NACCESS/naccess') print("Surfaces obtained from NACCESS") # Making a list of polar atoms polats = [] if backonly: selected_atoms = backbone_polars else: selected_atoms = all_polars for at in st.get_atoms(): if at.id in selected_atoms: polats.append(at) #Searching for contacts under HNLNK on diferent residues nbsearch = NeighborSearch(polats) hblist = [] for at1, at2 in nbsearch.search_all(HBLNK): if at1.get_parent() == at2.get_parent(): continue #Discard covalents and neighbours if (at1 - at2) < COVLNK: continue if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1: continue # remove waters if nowats: if at1.get_parent().get_resname() in waternames \ or at2.get_parent().get_resname() in waternames: continue atom1 = Atom(at1, 1, aaLib, vdwParams) atom2 = Atom(at2, 1, aaLib, vdwParams) if at1.get_serial_number() < at2.get_serial_number(): hblist.append([at1, at2]) else: hblist.append([at2, at1]) print() print("Polar contacts") print('{:13} {:13} {:6} '.format('Atom1', 'Atom2', 'Dist (A)')) if plotMode: #Dictionary to save as a key the residues with polar contacts and as values the number of contacts on sidechain or mainchain sidecontacts = {} maincontacts = {} res = set( ) #A set to store the residues involved on polar contacts. stored as a set to avoid repeated residues for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()): r1 = hb[0].get_parent() r2 = hb[1].get_parent() res.add(str(r1.id[1]) + r1.get_resname()) print('{:14} {:14} {:6.3f} '.format( r1.get_resname() + ' ' + str(r1.id[1]) + hb[0].id, r2.get_resname() + ' ' + str(r2.id[1]) + hb[1].id, hb[0] - hb[1])) if surf: print('{:14} ({:6.3f}) {:14} ({:6.3f}) {:6.3f} '.format( hb[0].atid(), float(hb[0].at.xtra['EXP_NACCESS']), hb[1].atid(), float(hb[1].at.xtra['EXP_NACCESS']), hb[0].at - hb[1].at)) if plotMode: if hb[0].id in backbone_polars and hb[1].id in backbone_polars: if r1.id[1] not in maincontacts: maincontacts[r1.id[1]] = 1 if r1.id[1] not in sidecontacts: sidecontacts[r1.id[1]] = 0 else: maincontacts[r1.id[1]] += 1 else: if r1.id[1] not in sidecontacts: if r1.id[1] not in maincontacts: maincontacts[r1.id[1]] = 0 sidecontacts[r1.id[1]] = 1 else: sidecontacts[r1.id[1]] += 1 if plotMode: #Plot the number of polar contacts of each residue on a bar graph. #Contacts on mainchain are blue bars, contacts on sidechain red bars N = len(res) nindex = np.arange(N) main_con = maincontacts.values() side_con = sidecontacts.values() fig, ax = plt.subplots() bar_width = 0.35 rec1 = ax.bar(nindex, main_con, bar_width, color='b', label='Mainchain contacts') rec2 = ax.bar(nindex + bar_width, side_con, bar_width, color='r', label='Sidechain contacts') ax.set_xlabel('AminoAcid Number') ax.set_ylabel('Number of polar contacts') ax.set_xticks(nindex + bar_width / 2, res) ax.set_xticklabels(res) ax.legend() fig.tight_layout() plt.show() print() print("Residue interactions") # Making list or residue pairs to avoid repeated pairs respairs = [] for hb in hblist: r1 = Residue(hb[0].get_parent(), 1, aaLib, vdwParams) r2 = Residue(hb[1].get_parent(), 1, aaLib, vdwParams) if [r1, r2] not in respairs: respairs.append([r1, r2]) l = [] for rpair in sorted(respairs, key=lambda i: i[0].resNum()): eint = rpair[0].elecInt(rpair[1], diel) evdw = rpair[0].vdwInt(rpair[1]) print('{:10} {:10} {: 8.4f} {: 8.4f} {: 8.4f}'.format( rpair[0].resid(), rpair[1].resid(), eint, evdw, eint + evdw)) l.append([ rpair[0].resid(), rpair[0], rpair[1].resid(), rpair[1], eint, evdw, eint + evdw ]) if index is not None: #Select only the residues with less energy (the most stables ones) for e, element in enumerate(sorted(l, key=lambda i: i[6])): if e < int(index): print(element) if surf: srfr1 = float(rpair[0].residue.xtra['EXP_NACCESS']['all_polar_rel']) srfr2 = float(rpair[1].residue.xtra['EXP_NACCESS']['all_polar_rel']) # Define 30% threshold for buried if (srfr1 > 30.) or (srfr2 > 30.): diel0 = 80.0 else: diel0 = diel eint = rpair[0].elecInt(rpair[1], diel0) print( '{:10} ({:>8.3f}) {:10} ({:>8.3f}) (e: {:4.1f}) {:>8.4f} {:>8.4f} {:>8.4f}' .format(rpair[0].resid(), srfr1, rpair[1].resid(), srfr2, diel0, eint, evdw, eint + evdw))
# load structure from PDB file st = parser.get_structure('1UBQ', '1ubq.pdb') select = [] #Select only CA atoms for at in st.get_atoms(): if at.id == 'CA': select.append(at) print("ATOM:", at.get_parent().get_resname(), at.get_parent().id[1], at.id) # Preparing search nbsearch = NeighborSearch(select) print("NBSEARCH:") #Searching for contacts under HBLNK ncontact = 1 for at1, at2 in nbsearch.search_all(MAXDIST): print("Contact: ", ncontact) print("at1", at1, at1.get_serial_number(), at1.get_parent().get_resname()) print("at2", at2, at2.get_serial_number(), at2.get_parent().get_resname()) print() ncontact += 1
def main(): parser = argparse.ArgumentParser(prog='polarContacts', description='Polar contacts detector') parser.add_argument('--backonly', action='store_true', dest='backonly', help='Restrict to backbone') parser.add_argument('--nowats', action='store_true', dest='nowats', help='Exclude water molecules') parser.add_argument('--diel', type=float, action='store', dest='diel', default=1.0, help='Relative dielectric constant') parser.add_argument('--vdw', action='store', dest='vdwprm', help='VDW Paramters file') parser.add_argument('--rlib', action='store', dest='reslib', help='AminoAcid library') parser.add_argument('pdb_path') args = parser.parse_args() print("Settings") print("--------") for k, v in vars(args).items(): print('{:10}:'.format(k), v) backonly = args.backonly nowats = args.nowats pdb_path = args.pdb_path vdwprm = args.vdwprm reslib = args.reslib diel = args.diel # Load VDW parameters vdwParams = VdwParamset(vdwprm) print("{} atom types loaded".format(vdwParams.ntypes)) # Load AA Library aaLib = ResiduesDataLib(reslib) print("{} amino acid atoms loaded".format(aaLib.nres)) if not pdb_path: parser.print_help() sys.exit(2) parser = PDBParser(PERMISSIVE=1) try: st = parser.get_structure('st', pdb_path) except OSError: print("#ERROR: loading PDB") sys.exit(2) # Checking for models if len(st) > 1: print("#WARNING: Several Models found, using only first") # Using Model 0 any way st = st[0] # Making a list of polar atoms polats = [] if backonly: selected_atoms = backbone_polars else: selected_atoms = all_polars for at in st.get_atoms(): if at.id in selected_atoms: polats.append(at) #Searching for contacts under HNLNK on diferent residues nbsearch = NeighborSearch(polats) hblist = [] for at1, at2 in nbsearch.search_all(HBLNK): if at1.get_parent() == at2.get_parent(): continue #Discard covalents and neighbours if (at1 - at2) < COVLNK: continue if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1: continue # remove waters if nowats: if at1.get_parent().get_resname() in waternames \ or at2.get_parent().get_resname() in waternames: continue # atom1 = Atom(at1,1,aaLib,vdwParams) # atom2 = Atom(at2,1,aaLib,vdwParams) if at1.get_serial_number() < at2.get_serial_number(): hblist.append([at1, at2]) else: hblist.append([at2, at1]) print() print("Polar contacts") print('{:13} {:13} {:6} '.format('Atom1', 'Atom2', 'Dist (A)')) for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()): r1 = hb[0].get_parent() r2 = hb[1].get_parent() print('{:14} {:14} {:6.3f} '.format( r1.get_resname() + ' ' + str(r1.id[1]) + hb[0].id, r2.get_resname() + ' ' + str(r2.id[1]) + hb[1].id, hb[0] - hb[1])) print() print("Residue interactions") # Making list or residue pairs to avoid repeated pairs respairs = [] for hb in hblist: r1 = hb[0].get_parent() r2 = hb[1].get_parent() if [r1, r2] not in respairs: respairs.append([r1, r2]) l = [] for rpair in sorted(respairs, key=lambda i: i[0].id[1]): eint = 0. evdw = 0. for at1 in rpair[0].get_atoms(): resid1 = rpair[0].get_resname() atid1 = at1.id atparam1 = aaLib.getParams(resid1, atid1) vdwprm1 = vdwParams.atTypes[atparam1.atType] for at2 in rpair[1].get_atoms(): resid2 = rpair[1].get_resname() atid2 = at2.id atparam2 = aaLib.getParams(resid2, atid2) vdwprm2 = vdwParams.atTypes[atparam2.atType] eint = eint + 332.16 * atparam1.charg * atparam2.charg / diel / ( at1 - at2) eps = math.sqrt(vdwprm1.eps * vdwprm2.eps) sig = math.sqrt(vdwprm1.sig * vdwprm2.sig) evdw = evdw + 4 * eps * ((sig / (at1 - at2))**12 - (sig / (at1 - at2))**6) print(resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw, eint + evdw) l.append([ resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw, eint + evdw ]) #here we have the code for finding the most stable contacts and plotting each energy component #(global, electrostatic and vdw) with respect to the residue number involved in these contacts print("Five most stable contacts") stable = [] for index, element in enumerate(sorted(l, key=lambda i: i[6])): if index < 5: stable.append(element) print(element) n_groups = 5 eint = (-96.879048334060371, -89.262401988309293, -63.650369322307412, -51.488465980661772, -50.345308360049728) vdw = (-1.0577685827505385, -1.5931226867662258, 1.5038605656892994, -2.9601475910966375, -2.0108017155800604) etot = (-97.936816916810912, -90.855524675075515, -62.146508756618111, -54.448613571758408, -52.356110075629786) fig, ax = plt.subplots() index = np.arange(n_groups) bar_width = 0.15 opacity = 0.5 inf1 = plt.bar(index, eint, bar_width, alpha=opacity, color='b', label='electrostatic energies') inf2 = plt.bar(index + bar_width, vdw, bar_width, alpha=opacity, color='g', label='van der waals energies') inf3 = plt.bar(index + bar_width, etot, bar_width, alpha=opacity, color='r', label='total energies') plt.title('Energies for pair of residues') plt.xlabel('Contacts') plt.ylabel('Energy') plt.xticks(index + bar_width, ('LYS-ASP', 'LYS-GLU', 'GLU-LYS', 'GLU-ARG', 'ASP-ARG')) plt.legend() plt.tight_layout() plt.show() both_main_x = [] both_main_y = [] both_side_x = [] both_side_y = [] main_side_x = [] main_side_y = [] side_main_x = [] side_main_y = [] for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()): if hb[0].id in backbone_polars: where0 = 'main' else: where0 = 'side' if hb[1].id in backbone_polars: where1 = 'main' else: where1 = 'side' label = where0 + ':' + where1 if label[0] == label[5] and label[0] == 'm': value = 1 both_main_x.append(hb[0].get_parent().id[1]) both_main_y.append(hb[1].get_parent().id[1]) elif label[0] == label[5] and label[0] == 's': value = 2 both_side_x.append(hb[0].get_parent().id[1]) both_side_y.append(hb[1].get_parent().id[1]) elif label[0] != label[5] and label[0] == 'm': value = 3 main_side_x.append(hb[0].get_parent().id[1]) main_side_y.append(hb[1].get_parent().id[1]) elif label[0] != label[5] and label[0] == 's': value = 4 side_main_x.append(hb[0].get_parent().id[1]) side_main_y.append(hb[1].get_parent().id[1]) linking = [label, value, hb[0].id, hb[1].id, hb[0] - hb[1]] print('{:14}{:14}{:14}{:14}{:6.3f}'.format(label, value, hb[0].id, hb[1].id, hb[0] - hb[1])) plt.figure(figsize=(10, 8)) plt.scatter(both_main_x, both_main_y, c='red', label='both_main') plt.scatter(both_side_x, both_side_y, c='green', label='both_side') plt.scatter(main_side_x, main_side_y, c='blue', label='main_side') plt.scatter(side_main_x, side_main_y, c='yellow', label='side_main') plt.title('Interaction') plt.xlabel('Residue1 number') plt.ylabel('Residue2 number') plt.legend(loc='upper right') plt.show() #The surface residues are the ones with an Area<5 (http://cib.cf.ocha.ac.jp/bitool/ASA/display.php?id=1513152459.2996) surface_res = [['ILE', 3], ['VAL', 5], ['ILE', 23], ['VAL', 26], ['ILE', 30], ['GLN', 41], ['LEU', 43], ['LEU', 56], ['ILE', 61], ['LEU', 67], ['LEU', 69]] for rpair in sorted(respairs, key=lambda i: i[0].id[1]): eint = 0. for atom1 in rpair[0].get_atoms(): resname1 = rpair[0].get_resname() atid1 = at1.id atparam1 = aaLib.getParams(resid1, atid1) for atom2 in rpair[1].get_atoms(): resname2 = rpair[1].get_resname() atid2 = at2.id atparam2 = aaLib.getParams(resid2, atid2) for values in surface_res: for values2 in surface_res: if resname1 == values[0] and rpair[0].id[1] == values[ 1]: if resname2 == values2[0] and rpair[1].id[ 1] == values2[1]: eint = eint + 80 * atparam1.charg * atparam2.charg / diel / ( atom1 - atom2) if eint != 0: print(resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw, eint + evdw)
def compute_interactions(pdb_name, save_to_db=False): do_distances = True do_interactions = True distances = [] classified = [] # Ensure that the PDB name is lowercase pdb_name = pdb_name.lower() # Get the pdb structure struc = Structure.objects.get( protein_conformation__protein__entry_name=pdb_name) pdb_io = StringIO(struc.pdb_data.pdb) # Get the preferred chain preferred_chain = struc.preferred_chain.split(',')[0] # Get the Biopython structure for the PDB s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0] #s = pdb_get_structure(pdb_name)[0] chain = s[preferred_chain] #return classified, distances # remove residues without GN and only those matching receptor. residues = struc.protein_conformation.residue_set.exclude( generic_number=None).all().prefetch_related('generic_number') dbres = {} dblabel = {} for r in residues: dbres[r.sequence_number] = r dblabel[r.sequence_number] = r.generic_number.label ids_to_remove = [] for res in chain: if not res.id[1] in dbres.keys() and res.get_resname() != "HOH": ids_to_remove.append(res.id) for i in ids_to_remove: chain.detach_child(i) if do_distances: for i1, res1 in enumerate(chain, 1): if not is_water(res1): for i2, res2 in enumerate(chain, 1): if i2 > i1 and not is_water(res2): # Do not calculate twice. distance = res1['CA'] - res2['CA'] distances.append( (dbres[res1.id[1]], dbres[res2.id[1]], distance, dblabel[res1.id[1]], dblabel[res2.id[1]])) if do_interactions: atom_list = Selection.unfold_entities(s[preferred_chain], 'A') # Search for all neighbouring residues ns = NeighborSearch(atom_list) all_neighbors = ns.search_all(6.6, "R") # Filter all pairs containing non AA residues all_aa_neighbors = [ pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1]) ] # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart all_aa_neighbors = [ pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES ] # For each pair of interacting residues, determine the type of interaction interactions = [ InteractingPair(res_pair[0], res_pair[1], dbres[res_pair[0].id[1]], dbres[res_pair[1].id[1]], struc) for res_pair in all_aa_neighbors if not is_water(res_pair[0]) and not is_water(res_pair[1]) ] # Split unto classified and unclassified. classified = [ interaction for interaction in interactions if len(interaction.get_interactions()) > 0 ] if save_to_db: if do_interactions: # Delete previous for faster load in InteractingResiduePair.objects.filter( referenced_structure=struc).all().delete() # bulk_pair = [] # for d in distances: # pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc) # bulk_pair.append(pair) # Create interaction dictionary interaction_pairs = {} for pair in classified: res_1 = pair.get_residue_1() res_2 = pair.get_residue_2() key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) interaction_pairs[key] = pair # POSSIBLE ADDON: support for multiple water-mediated bonds ## Obtain list of water molecules water_list = { water for residue in s[preferred_chain] if residue.get_resname() == "HOH" for water in residue.get_atoms() } if len(water_list) > 0: ## Iterate water molecules over residue atom list water_neighbors = [ (water, match_res) for water in water_list for match_res in ns.search(water.coord, 3.5, "R") if not is_water(match_res) and ( is_hba(match_res) or is_hbd(match_res)) ] # intersect between residues sharing the same interacting water for index_one in range(len(water_neighbors)): water_pair_one = water_neighbors[index_one] for index_two in [ index for index in range(index_one + 1, len(water_neighbors)) if water_pair_one[0] == water_neighbors[index][0] ]: water_pair_two = water_neighbors[index_two] res_1 = water_pair_one[1] res_2 = water_pair_two[1] # TODO: order residues + check minimum spacing between residues key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) # Verify h-bonds between water and both residues matches_one = InteractingPair.verify_water_hbond( water_pair_one[1], water_pair_one[0]) matches_two = InteractingPair.verify_water_hbond( water_pair_two[1], water_pair_two[0]) if len(matches_one) > 0 and len(matches_two) > 0: # if not exists, create residue pair without interactions if not key in interaction_pairs: interaction_pairs[key] = InteractingPair( res_1, res_2, dbres[res_1.id[1]], dbres[res_2.id[1]], struc) for a, b in zip(matches_one, matches_two): # HACK: store water ID as part of first atom name interaction_pairs[key].interactions.append( WaterMediated( a + "|" + str(water_pair_one[0].get_parent( ).get_id()[1]), b)) for p in classified: p.save_into_database() if do_distances: # Distance.objects.filter(structure=struc).all().delete() bulk_distances = [] for i, d in enumerate(distances): distance = Distance(distance=int(100 * d[2]), res1=d[0], res2=d[1], gn1=d[3], gn2=d[4], gns_pair='_'.join([d[3], d[4]]), structure=struc) bulk_distances.append(distance) if len(bulk_distances) > 1000: pairs = Distance.objects.bulk_create(bulk_distances) bulk_distances = [] pairs = Distance.objects.bulk_create(bulk_distances) return classified, distances
def compute_interactions(pdb_name, save_to_db=False): do_distances = True do_interactions = True do_complexes = True distances = [] classified = [] classified_complex = [] # Ensure that the PDB name is lowercase pdb_name = pdb_name.lower() # Get the pdb structure struc = Structure.objects.get( protein_conformation__protein__entry_name=pdb_name) pdb_io = StringIO(struc.pdb_data.pdb) # Get the preferred chain preferred_chain = struc.preferred_chain.split(',')[0] # Get the Biopython structure for the PDB s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0] #s = pdb_get_structure(pdb_name)[0] chain = s[preferred_chain] #return classified, distances # remove residues without GN and only those matching receptor. residues = struc.protein_conformation.residue_set.exclude( generic_number=None).all().prefetch_related('generic_number') dbres = {} dblabel = {} for r in residues: dbres[r.sequence_number] = r dblabel[r.sequence_number] = r.generic_number.label ids_to_remove = [] for res in chain: if not res.id[1] in dbres.keys() and res.get_resname() != "HOH": ids_to_remove.append(res.id) for i in ids_to_remove: chain.detach_child(i) if do_distances: for i1, res1 in enumerate(chain, 1): if not is_water(res1): for i2, res2 in enumerate(chain, 1): if i2 > i1 and not is_water(res2): # Do not calculate twice. distance = res1['CA'] - res2['CA'] distances.append( (dbres[res1.id[1]], dbres[res2.id[1]], distance, dblabel[res1.id[1]], dblabel[res2.id[1]])) if do_interactions: atom_list = Selection.unfold_entities(s[preferred_chain], 'A') # Search for all neighbouring residues ns = NeighborSearch(atom_list) all_neighbors = ns.search_all(6.6, "R") # Filter all pairs containing non AA residues all_aa_neighbors = [ pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1]) ] # Only include contacts between residues more than NUM_SKIP_RESIDUES sequence steps apart all_aa_neighbors = [ pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES ] # For each pair of interacting residues, determine the type of interaction interactions = [ InteractingPair(res_pair[0], res_pair[1], dbres[res_pair[0].id[1]], dbres[res_pair[1].id[1]], struc) for res_pair in all_aa_neighbors if not is_water(res_pair[0]) and not is_water(res_pair[1]) ] # Split unto classified and unclassified. classified = [ interaction for interaction in interactions if len(interaction.get_interactions()) > 0 ] if do_complexes: try: # check if structure in signprot_complex complex = SignprotComplex.objects.get(structure=struc) # Get all GPCR residue atoms based on preferred chain gpcr_atom_list = [ atom for residue in Selection.unfold_entities(s[preferred_chain], 'R') if is_aa(residue) \ for atom in residue.get_atoms()] # Get all residue atoms from the coupled protein (e.g. G-protein) # NOW: select alpha subnit protein chain using complex model sign_atom_list = [ atom for residue in Selection.unfold_entities(s[complex.alpha], 'R') if is_aa(residue) \ for atom in residue.get_atoms()] ns_gpcr = NeighborSearch(gpcr_atom_list) ns_sign = NeighborSearch(sign_atom_list) # For each GPCR atom perform the neighbor search on the signaling protein all_neighbors = { (gpcr_atom.parent, match_res) for gpcr_atom in gpcr_atom_list for match_res in ns_sign.search(gpcr_atom.coord, 4.5, "R") } # For each pair of interacting residues, determine the type of interaction residues_sign = ProteinConformation.objects.get( protein__entry_name=pdb_name + "_" + complex.alpha.lower()).residue_set.exclude( generic_number=None).all().prefetch_related( 'generic_number') # grab labels from sign protein dbres_sign = {} dblabel_sign = {} for r in residues_sign: dbres_sign[r.sequence_number] = r dblabel_sign[r.sequence_number] = r.generic_number.label # Find interactions interactions = [ InteractingPair(res_pair[0], res_pair[1], dbres[res_pair[0].id[1]], dbres_sign[res_pair[1].id[1]], struc) for res_pair in all_neighbors if res_pair[0].id[1] in dbres and res_pair[1].id[1] in dbres_sign ] # Filter unclassified interactions classified_complex = [ interaction for interaction in interactions if len(interaction.get_interactions()) > 0 ] # Convert to dictionary for water calculations interaction_pairs = {} for pair in classified_complex: res_1 = pair.get_residue_1() res_2 = pair.get_residue_2() key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) interaction_pairs[key] = pair # Obtain list of all water molecules in the structure water_list = { water for chain in s for residue in chain if residue.get_resname() == "HOH" for water in residue.get_atoms() } # If waters are present calculate water-mediated interactions if len(water_list) > 0: ## Iterate water molecules over coupled and gpcr atom list water_neighbors_gpcr = { (water, match_res) for water in water_list for match_res in ns_gpcr.search(water.coord, 3.5, "R") } water_neighbors_sign = { (water, match_res) for water in water_list for match_res in ns_sign.search(water.coord, 3.5, "R") } # TODO: DEBUG AND VERIFY this code as water-mediated interactions were present at this time # 1. UPDATE complexes to include also mini Gs and peptides (e.g. 4X1H/6FUF/5G53) # 2. Run and verify water-mediated do_interactions # 3. Improve the intersection between the two hit lists ## TODO: cleaner intersection between hits from the two Lists # see new code below # for water_pair_one in water_neighbors_gpcr: # for water_pair_two in water_neighbors_sign: # if water_pair_one[0]==water_pair_two[0]: # res_1 = water_pair_one[1] # res_2 = water_pair_two[1] # key = res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1]) # Check if interaction is polar # if any(get_polar_interactions(water_pair_one[0].get_parent(), water_pair_one[1])) and any(get_polar_interactions(water_pair_two[0].get_parent(), water_pair_two[1])): # TODO Check if water interaction is already present (e.g. multiple waters) # TODO Is splitting of sidechain and backbone-mediated interactions desired? # if not key in interaction_pairs: # interaction_pairs[key] = InteractingPair(res_1, res_2, dbres[res_1.id[1]], dbres_sign[res_2.id[1]], struc) # TODO: fix assignment of interacting atom labels (now seems limited to residues) # interaction_pairs[key].interactions.append(WaterMediated(a + "|" + str(water_pair_one[0].get_parent().get_id()[1]), b)) except SignprotComplex.DoesNotExist: # print("No complex definition found for", pdb_name) log = "No complex definition found for " + pdb_name except ProteinConformation.DoesNotExist: print( "No protein conformation definition found for signaling protein of ", pdb_name) # log = "No protein conformation definition found for signaling protein of " + pdb_name if save_to_db: if do_interactions: # Delete previous for faster load in InteractingResiduePair.objects.filter( referenced_structure=struc).all().delete() # bulk_pair = [] # for d in distances: # pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc) # bulk_pair.append(pair) # Create interaction dictionary interaction_pairs = {} for pair in classified: res_1 = pair.get_residue_1() res_2 = pair.get_residue_2() key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) interaction_pairs[key] = pair # POSSIBLE ADDON: support for multiple water-mediated bonds ## Obtain list of water molecules water_list = { water for residue in s[preferred_chain] if residue.get_resname() == "HOH" for water in residue.get_atoms() } if len(water_list) > 0: ## Iterate water molecules over residue atom list water_neighbors = [ (water, match_res) for water in water_list for match_res in ns.search(water.coord, 3.5, "R") if not is_water(match_res) and ( is_hba(match_res) or is_hbd(match_res)) ] # intersect between residues sharing the same interacting water for index_one in range(len(water_neighbors)): water_pair_one = water_neighbors[index_one] for index_two in [ index for index in range(index_one + 1, len(water_neighbors)) if water_pair_one[0] == water_neighbors[index][0] ]: water_pair_two = water_neighbors[index_two] res_1 = water_pair_one[1] res_2 = water_pair_two[1] # TODO: order residues + check minimum spacing between residues key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) # Verify h-bonds between water and both residues matches_one = InteractingPair.verify_water_hbond( water_pair_one[1], water_pair_one[0]) matches_two = InteractingPair.verify_water_hbond( water_pair_two[1], water_pair_two[0]) if len(matches_one) > 0 and len(matches_two) > 0: # if not exists, create residue pair without interactions if not key in interaction_pairs: interaction_pairs[key] = InteractingPair( res_1, res_2, dbres[res_1.id[1]], dbres[res_2.id[1]], struc) for a, b in zip(matches_one, matches_two): # HACK: store water ID as part of first atom name interaction_pairs[key].interactions.append( WaterMediated( a + "|" + str(water_pair_one[0].get_parent( ).get_id()[1]), b)) for p in classified: p.save_into_database() if do_complexes: for pair in classified_complex: pair.save_into_database() if do_distances: # Distance.objects.filter(structure=struc).all().delete() bulk_distances = [] for i, d in enumerate(distances): distance = Distance(distance=int(100 * d[2]), res1=d[0], res2=d[1], gn1=d[3], gn2=d[4], gns_pair='_'.join([d[3], d[4]]), structure=struc) bulk_distances.append(distance) if len(bulk_distances) > 1000: pairs = Distance.objects.bulk_create(bulk_distances) bulk_distances = [] pairs = Distance.objects.bulk_create(bulk_distances) return classified, distances
def main(): parser = argparse.ArgumentParser(prog='polarContacts', description='Polar contacts detector') parser.add_argument('--backonly', action='store_true', dest='backonly', help='Restrict to backbone') parser.add_argument('--nowats', action='store_true', dest='nowats', help='Exclude water molecules') parser.add_argument('--diel', type=float, action='store', dest='diel', default=1.0, help='Relative dielectric constant') parser.add_argument('--vdw', action='store', dest='vdwprm', help='VDW Paramters file') parser.add_argument('--rlib', action='store', dest='reslib', help='AminoAcid library') parser.add_argument('pdb_path') args = parser.parse_args() print("Settings") print("--------") for k, v in vars(args).items(): print('{:10}:'.format(k), v) backonly = args.backonly nowats = args.nowats pdb_path = args.pdb_path vdwprm = args.vdwprm reslib = args.reslib diel = args.diel # Load VDW parameters vdwParams = VdwParamset(vdwprm) print("{} atom types loaded".format(vdwParams.ntypes)) # Load AA Library aaLib = ResiduesDataLib(reslib) print("{} amino acid atoms loaded".format(aaLib.nres)) if not pdb_path: parser.print_help() sys.exit(2) parser = PDBParser(PERMISSIVE=1) try: st = parser.get_structure('st', pdb_path) except OSError: print("#ERROR: loading PDB") sys.exit(2) # Checking for models if len(st) > 1: print("#WARNING: Several Models found, using only first") # Using Model 0 any way st = st[0] # Making a list of polar atoms polats = [] if backonly: selected_atoms = backbone_polars else: selected_atoms = all_polars for at in st.get_atoms(): if at.id in selected_atoms: polats.append(at) #Searching for contacts under HNLNK on diferent residues nbsearch = NeighborSearch(polats) hblist = [] for at1, at2 in nbsearch.search_all(HBLNK): if at1.get_parent() == at2.get_parent(): continue #Discard covalents and neighbours if (at1 - at2) < COVLNK: continue if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1: continue # remove waters if nowats: if at1.get_parent().get_resname() in waternames \ or at2.get_parent().get_resname() in waternames: continue # atom1 = Atom(at1,1,aaLib,vdwParams) # atom2 = Atom(at2,1,aaLib,vdwParams) if at1.get_serial_number() < at2.get_serial_number(): hblist.append([at1, at2]) else: hblist.append([at2, at1]) print() print("Polar contacts") print('{:13} {:13} {:6} '.format('Atom1', 'Atom2', 'Dist (A)')) for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()): r1 = hb[0].get_parent() r2 = hb[1].get_parent() print('{:14} {:14} {:6.3f} '.format( r1.get_resname() + ' ' + str(r1.id[1]) + hb[0].id, r2.get_resname() + ' ' + str(r2.id[1]) + hb[1].id, hb[0] - hb[1])) print() print("Residue interactions") # Making list or residue pairs to avoid repeated pairs respairs = [] for hb in hblist: r1 = hb[0].get_parent() r2 = hb[1].get_parent() if [r1, r2] not in respairs: respairs.append([r1, r2]) list5 = [] for rpair in sorted(respairs, key=lambda i: i[0].id[1]): eint = 0. evdw = 0. for at1 in rpair[0].get_atoms(): resid1 = rpair[0].get_resname() atid1 = at1.id atparam1 = aaLib.getParams(resid1, atid1) vdwprm1 = vdwParams.atTypes[atparam1.atType] for at2 in rpair[1].get_atoms(): resid2 = rpair[1].get_resname() atid2 = at2.id atparam2 = aaLib.getParams(resid2, atid2) vdwprm2 = vdwParams.atTypes[atparam2.atType] eint = eint + 332.16 * atparam1.charg * atparam2.charg / diel / ( at1 - at2) eps = math.sqrt(vdwprm1.eps * vdwprm2.eps) sig = math.sqrt(vdwprm1.sig * vdwprm2.sig) evdw = evdw + 4 * eps * ((sig / (at1 - at2))**12 - (sig / (at1 - at2))**6) list5.append((resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw, eint + evdw)) #list all print(resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw, eint + evdw) # eint= electrostatic, evdw= vanderwaals, eint+evdw= total i = 0 for element in sorted(list5, key=lambda li: li[6]): #sort list i += 1 print(element) if i == 5: break #gives you the 5 first atoms
for k, v in vars(args).items(): print('{:10}:'.format(k), v) print("PDB.filename:", args.pdb_file.name) parser = PDBParser(PERMISSIVE=1) print('Parsing', args.pdb_file) # load structure from PDB file of PDB ifle handler st = parser.get_structure('STR', args.pdb_file.name) # collecting CA atoms ca_atoms = [] for at in st.get_atoms(): if at.id == 'CA': ca_atoms.append(at) print(len(ca_atoms), 'CA Atoms found') # Preparing search nbsearch = NeighborSearch(ca_atoms) at_pairs = nbsearch.search_all(args.max_dist) # Output sorted by atom,serial_number, nbsearch returns ordered pairs # Redirect the output with > output_list for at1, at2 in sorted(at_pairs, key=lambda at_pair: at_pair[0].serial_number): print(atom_id(at1), ":", atom_id(at2), at1 - at2)
return res_list, np.concatenate(coo_list).reshape((-1, 3)) # %% p, c = "4fae", "B" parser = PDBParser() if PreProcess.download_pdb(p, f'{p}.pdb'): structure = parser.get_structure('a', f'{p}.pdb') chain = structure[0][c] res_list, coo_list = get_residue_feature(chain) atom_list = [i for i in chain.get_atoms()] ns = NeighborSearch(atom_list) ns_list = ns.search_all(3.3, level='R') edge, l = [], len(res_list) for ai, aj in ns_list: i, j = ai.get_id()[1]-1, aj.get_id()[1]-1 if i < j-1 and j < l: edge.append([i, j, 1]) #%% mod = 'train' out = {'train': {}, 'test':{}, 'valid':{}} out[mod]['edge'] =
def main(): parser = argparse.ArgumentParser(prog='polarContacts', description='Polar contacts detector') parser.add_argument('--backonly', action='store_true', dest='backonly', help='Restrict to backbone') parser.add_argument('--nowats', action='store_true', dest='nowats', help='Exclude water molecules') parser.add_argument('--diel', type=float, action='store', dest='diel', default=1.0, help='Relative dielectric constant') parser.add_argument('--vdw', action='store', dest='vdwprm', help='VDW Paramters file') parser.add_argument('--rlib', action='store', dest='reslib', help='AminoAcid library') parser.add_argument('pdb_path') args = parser.parse_args() print("Settings") print("--------") for k, v in vars(args).items(): print('{:10}:'.format(k), v) backonly = args.backonly nowats = args.nowats pdb_path = args.pdb_path vdwprm = args.vdwprm reslib = args.reslib diel = args.diel # Load VDW parameters vdwParams = VdwParamset(vdwprm) print("{} atom types loaded".format(vdwParams.ntypes)) # Load AA Library aaLib = ResiduesDataLib(reslib) print("{} amino acid atoms loaded".format(aaLib.nres)) if not pdb_path: parser.print_help() sys.exit(2) parser = PDBParser(PERMISSIVE=1) try: st = parser.get_structure('st', pdb_path) except OSError: print("#ERROR: loading PDB") sys.exit(2) # Checking for models if len(st) > 1: print("#WARNING: Several Models found, using only first") # Using Model 0 any way st = st[0] # Making a list of polar atoms polats = [] if backonly: selected_atoms = backbone_polars else: selected_atoms = all_polars for at in st.get_atoms(): if at.id in selected_atoms: polats.append(at) #Searching for contacts under HNLNK on diferent residues nbsearch = NeighborSearch(polats) hblist = [] for at1, at2 in nbsearch.search_all(HBLNK): if at1.get_parent() == at2.get_parent(): continue #Discard covalents and neighbours if (at1 - at2) < COVLNK: continue if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1: continue # remove waters if nowats: if at1.get_parent().get_resname() in waternames \ or at2.get_parent().get_resname() in waternames: continue # atom1 = Atom(at1,1,aaLib,vdwParams) # atom2 = Atom(at2,1,aaLib,vdwParams) if at1.get_serial_number() < at2.get_serial_number(): hblist.append([at1, at2]) else: hblist.append([at2, at1]) print() print() print("Polar contacts") print('{:13} {:13} {:6} '.format('Atom1', 'Atom2', 'Dist (A)')) for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()): r1 = hb[0].get_parent() r2 = hb[1].get_parent() print('{:14} {:14} {:6.3f} '.format( r1.get_resname() + ' ' + str(r1.id[1]) + hb[0].id, r2.get_resname() + ' ' + str(r2.id[1]) + hb[1].id, hb[0] - hb[1])) print() print("Residue interactions") # Making list or residue pairs to avoid repeated pairs respairs = [] for hb in hblist: r1 = hb[0].get_parent() r2 = hb[1].get_parent() if [r1, r2] not in respairs: respairs.append([r1, r2]) print('Exercise A') l = [] for rpair in sorted(respairs, key=lambda i: i[0].id[1]): eint = 0. evdw = 0. for at1 in rpair[0].get_atoms(): resid1 = rpair[0].get_resname() atid1 = at1.id atparam1 = aaLib.getParams(resid1, atid1) vdwprm1 = vdwParams.atTypes[atparam1.atType] for at2 in rpair[1].get_atoms(): resid2 = rpair[1].get_resname() atid2 = at2.id atparam2 = aaLib.getParams(resid2, atid2) vdwprm2 = vdwParams.atTypes[atparam2.atType] eint = eint + 332.16 * atparam1.charg * atparam2.charg / diel / ( at1 - at2) eps = math.sqrt(vdwprm1.eps * vdwprm2.eps) sig = math.sqrt(vdwprm1.sig * vdwprm2.sig) evdw = evdw + 4 * eps * ((sig / (at1 - at2))**12 - (sig / (at1 - at2))**6) #print (resid1,rpair[0].id[1],resid2,rpair[1].id[1],eint,evdw, eint+evdw) l.append([ resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw, eint + evdw ]) for index, element in enumerate(sorted(l, key=lambda i: i[6])): if index < 5: print(element) #Exercise B 1 print('Exercise B.1') mainmain = [] mainside = [] sidemain = [] sideside = [] to_main = [] to_side = [] for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()): resid1 = hb[0].get_parent() resid2 = hb[1].get_parent() if hb[0].id in backbone_polars: a = 'main' else: a = 'side' if hb[1].id in backbone_polars: b = 'main' else: b = 'side' label = a + '-' + b if label == 'main-main': mainmain.append([ resid1.get_resname(), resid1.id[1], resid2.get_resname(), resid2.id[1], label, hb[0].id, hb[1].id, hb[0] - hb[1] ]) if (str(resid1.get_resname()) + ' ' + str(resid1.id[1])) not in to_main: to_main.append( str(resid1.get_resname()) + ' ' + str(resid1.id[1])) if (str(resid2.get_resname()) + ' ' + str(resid2.id[1])) not in to_main: to_main.append( str(resid2.get_resname()) + ' ' + str(resid2.id[1])) elif label == 'main-side': mainside.append([ resid1.get_resname(), resid1.id[1], resid2.get_resname(), resid2.id[1], label, hb[0].id, hb[1].id, hb[0] - hb[1] ]) if (str(resid2.get_resname()) + ' ' + str(resid2.id[1])) not in to_main: to_main.append( str(resid2.get_resname()) + ' ' + str(resid2.id[1])) if (str(resid1.get_resname()) + ' ' + str(resid1.id[1])) not in to_side: to_side.append( str(resid1.get_resname()) + ' ' + str(resid1.id[1])) elif label == 'side-main': sidemain.append([ resid1.get_resname(), resid1.id[1], resid2.get_resname(), resid2.id[1], label, hb[0].id, hb[1].id, hb[0] - hb[1] ]) if (str(resid2.get_resname()) + ' ' + str(resid2.id[1])) not in to_side: to_side.append( str(resid2.get_resname()) + ' ' + str(resid2.id[1])) if (str(resid1.get_resname()) + ' ' + str(resid1.id[1])) not in to_main: to_main.append( str(resid1.get_resname()) + ' ' + str(resid1.id[1])) else: sideside.append([ resid1.get_resname(), resid1.id[1], resid2.get_resname(), resid2.id[1], label, hb[0].id, hb[1].id, hb[0] - hb[1] ]) if (str(resid1.get_resname()) + ' ' + str(resid1.id[1])) not in to_side: to_side.append( str(resid1.get_resname()) + ' ' + str(resid1.id[1])) if (str(resid2.get_resname()) + ' ' + str(resid2.id[1])) not in to_side: to_side.append( str(resid2.get_resname()) + ' ' + str(resid2.id[1])) for i in mainmain: print(i) for i in mainside: print(i) for i in sidemain: print(i) for i in sideside: print(i) nmain = [] nummain = [] nside = [] numside = [] for i in range(len(to_main)): nmain.append('to_main') nummain.append(i) for i in range(len(to_side)): nside.append('to_side') numside.append(i + len(to_main)) x = np.array(nummain + numside) y = np.array(nmain + nside) res = to_main + to_side plt.xticks(x, res) plt.plot(x, y, 'ro') plt.show() #It is generated a plot indicating if each residue is interacting with one or more elements either in main chain or in side chain #End of exercise B 1 print() print('Exercise B', 2) ## From http://cib.cf.ocha.ac.jp/bitool/ASA/ I have obtained that the residues in the surface are: surface = [['ILE', 3], ['VAL', 5], ['ILE', 23], ['VAL', 26], ['ILE', 30], ['GLN', 41], ['LEU', 43], ['LEU', 56], ['ILE', 61], ['LEU', 67], ['LEU', 69]] l = [] for rpair in sorted(respairs, key=lambda i: i[0].id[1]): eint = 0. for at1 in rpair[0].get_atoms(): resid1 = rpair[0].get_resname() resid1id = rpair[0].id[1] atid1 = at1.id atparam1 = aaLib.getParams(resid1, atid1) for at2 in rpair[1].get_atoms(): resid2 = rpair[1].get_resname() resid2id = rpair[1].id[1] atid2 = at2.id atparam2 = aaLib.getParams(resid2, atid2) for i in surface: for j in surface: if resid1 == i[0] and resid1id == i[1] and resid2 == j[ 0] and resid2id == j[1]: eint = eint + 80 * atparam1.charg * atparam2.charg / diel / ( at1 - at2) if eint != 0: l.append([resid1, resid1id, resid2, resid2id, eint]) for i in l: print(i)