Пример #1
0
def get_interface(st, dist):
    ''' Detects interface residues within a distance(dist)
        Assumes two chains, i.e. a unique interface set per chain.
    '''
    select_ats = []
    for at in st.get_atoms():
        # Skip Hydrogens to reduce time
        if at.element != 'H':
            select_ats.append(at)
    nbsearch = NeighborSearch(select_ats)
    interface = {}
    # Sets are more efficient than lists. Use sets when order is not relevant
    for ch in st[0]:
        interface[ch.id] = set()

    for at1, at2 in nbsearch.search_all(dist):
        #Only different chains
        res1 = at1.get_parent()
        ch1 = res1.get_parent()
        res2 = at2.get_parent()
        ch2 = res2.get_parent()
        if ch1 != ch2:
            interface[ch1.id].add(res1)
            interface[ch2.id].add(res2)
    return interface
Пример #2
0
def getLigandNbrs(resids: List[Residue],
                  struct: Structure) -> List[ResidueDict]:
    """KDTree search the neighbors of a given list of residues(which constitue a ligand) 
    and return unique having tagged them with a ban identifier proteins within 5 angstrom of these residues. """
    ns = NeighborSearch(list(struct.get_atoms()))
    nbrs = []

    for r in resids:
        # a ligand consists of residues
        resatoms = r.child_list[0]
        #  each residue has an atom plucked at random
        for nbrresidues in ns.search(resatoms.get_coord(), 5, level='R'):
            # we grab all residues in radius around that atom and extend the list of neighbors with those
            nbrs.extend([nbrresidues])

    # Filter out the residues that constitute the ligand itself
    filtered = []
    for neighbor in nbrs:
        present = 0
        for constit in resids:
            if ResidueDict(constit) == ResidueDict(neighbor):
                present = 1
        if present == 0:
            filtered.append(ResidueDict(neighbor))

    return [*map(lambda x: addBanClass(x), set(filtered))]
Пример #3
0
def getInterface(struct, useCA=True, res2res_dist=8):
    if useCA:
        atomList = [
            atom for atom in struct[0].get_atoms()
            if atom.name.startswith("CA")
        ]
    else:
        atomList = [
            atom for atom in struct[0].get_atoms()
            if not atom.name.startswith("H")
        ]
    chains = struct[0].child_list
    searcher = NeighborSearch(atomList)
    allNeigs = searcher.search_all(res2res_dist, level="R")
    residuesBindingSitePerChain = {
        chain.get_id(): {
            "bindingSite": []
        }
        for chain in chains
    }
    for res1, res2 in allNeigs:
        pdbId1, modelId1, chainId1, resId1 = res1.get_full_id()
        pdbId2, modelId2, chainId2, resId2 = res2.get_full_id()
        if chainId1 != chainId2:
            residuesBindingSitePerChain[chainId1]["bindingSite"].append(
                res1.get_id())
            residuesBindingSitePerChain[chainId2]["bindingSite"].append(
                res2.get_id())

    return residuesBindingSitePerChain
Пример #4
0
def compute_interactions(pdb_name):
    # Ensure that the PDB name is lowercase
    pdb_name = pdb_name.lower()

    # Get the pdb structure
    s = Structure.objects.get(protein_conformation__protein__entry_name=pdb_name)

    # Get the preferred chain
    preferred_chain = s.preferred_chain.split(',')[0]

    # Get the Biopython structure for the PDB
    s = pdb_get_structure(pdb_name)

    # Get all atoms
    atom_list = Selection.unfold_entities(s.get_chains().__next__(), 'A')

    # Search for all neighbouring residues
    ns = NeighborSearch(atom_list)
    all_neighbors = ns.search_all(4.5, "R")

    # Filter all pairs containing non AA residues
    all_aa_neighbors = [pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])]

    # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart
    all_aa_neighbors = [pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES]

    # For each pair of interacting residues, determine the type of interaction
    interactions = [InteractingPair(res_pair[0], res_pair[1], get_interactions(res_pair[0], res_pair[1])) for res_pair in all_aa_neighbors]

    # Split unto classified and unclassified.
    classified = [interaction for interaction in interactions if len(interaction.get_interactions()) > 0]

    return classified
Пример #5
0
    def getPairsOfResiduesInContact(self, structureL, structureR):
        '''
      Computes which amino acids of ligand are in contact with which amino acids of receptor
      
      @param structureL: Bio.PDB.Structure. Structure of ligand (bound state if available)
      @param structureR:   Bio.PDB.Structure. Structure of receptor (bound state if available).
      @return positiveContacts:  Set {(Bio.PDB.Residue.fullResId (from bound structure structureL), Bio.PDB.Residue.fullResId (from bound structure structureR))}
      @return chainsNotContactL: Set { str(chainId structureL)}
      @return chainsNotContactR: Set { str(chainId structureR)}
      
    '''
        try:
            atomListL = [
                atom for atom in structureL.child_list[0].get_atoms()
                if not atom.name.startswith("H")
            ]
        except IndexError:
            raise NoValidPDBFile("Problems parsing pdbFile 1")
        try:
            atomListR = [
                atom for atom in structureR.child_list[0].get_atoms()
                if not atom.name.startswith("H")
            ]
        except IndexError:
            raise NoValidPDBFile("Problems parsing pdbFile 2")

        searcher = NeighborSearch(atomListL + atomListR)
        allNeigs = searcher.search_all(self.res2res_dist, level="R")
        lStructId = structureL.get_id()
        rStructId = structureR.get_id()
        positiveContacts = set([])
        chainsInContactL = set([])
        chainsInContactR = set([])
        for res1, res2 in allNeigs:
            pdbId1, modelId1, chainId1, resId1 = res1.get_full_id()
            pdbId2, modelId2, chainId2, resId2 = res2.get_full_id()
            fullResId1 = res1.get_full_id()
            fullResId2 = res2.get_full_id()
            if pdbId1 == lStructId and pdbId2 == rStructId:
                positiveContacts.add((fullResId1, fullResId2))
                chainsInContactL.add(fullResId1[2])
                chainsInContactR.add(fullResId2[2])
            elif pdbId1 == rStructId and pdbId2 == lStructId:
                positiveContacts.add((fullResId2, fullResId1))
                chainsInContactL.add(fullResId2[2])
                chainsInContactR.add(fullResId1[2])
        if CONSIDER_HOMOOLIG_AS_POS:
            positiveContacts, chainsInContactL, chainsInContactR = self.fixHomooligomers(
                structureL, structureR, positiveContacts, chainsInContactL,
                chainsInContactR)
        allChainsL = set([elem.get_id() for elem in structureL[0].get_list()])
        allChainsR = set([elem.get_id() for elem in structureR[0].get_list()])
        chainsNotContactL = allChainsL.difference(chainsInContactL)
        chainsNotContactR = allChainsR.difference(chainsInContactR)
        return positiveContacts, chainsNotContactL, chainsNotContactR
Пример #6
0
    def getPairsOfResiduesInContact(self, structureL, structureR):
        '''
      Computes which amino acids of ligand are in contact with which amino acids of receptor
      
      :param structureL: Bio.PDB.Structure. Structure of ligand unbound state if available
      :param structureR:   Bio.PDB.Structure. Structure of receptor unbound state if available.
      :return positiveContacts, chainsNotContactL, chainsNotContactR
      
               positiveContacts:  Set {( Bio.PDB.Residue.fullResId (from bound structure structureL), 
                                        Bio.PDB.Residue.fullResId (from bound structure structureR)  )
                                      }
              chainsNotContactL: Set { Bio.PDB.Chain.get_id()}  for ligand chains that are not in contact
              chainsNotContactR: Set { Bio.PDB.Chain.get_id()}  for receptor chains that are not in contact              
    '''
        try:
            atomListL = [
                atom for atom in structureL.child_list[0].get_atoms()
                if not atom.name.startswith("H")
            ]
        except IndexError:
            raise NoValidPDBFile("Problems parsing pdbFile 1")
        try:
            atomListR = [
                atom for atom in structureR.child_list[0].get_atoms()
                if not atom.name.startswith("H")
            ]
        except IndexError:
            raise NoValidPDBFile("Problems parsing pdbFile 2")

        searcher = NeighborSearch(atomListL + atomListR)
        allNeigs = searcher.search_all(self.res2res_dist, level="R")
        lStructId = structureL.get_id()
        rStructId = structureR.get_id()
        positiveContactsResidues = set([])
        chainsInContactL = set([])
        chainsInContactR = set([])
        for res1, res2 in allNeigs:
            pdbId1, modelId1, chainId1, resId1 = res1.get_full_id()
            pdbId2, modelId2, chainId2, resId2 = res2.get_full_id()
            if pdbId1 == lStructId and pdbId2 == rStructId:
                positiveContactsResidues.add((res1, res2))
                chainsInContactL.add(chainId1)
                chainsInContactR.add(chainId2)
            elif pdbId1 == rStructId and pdbId2 == lStructId:
                positiveContactsResidues.add((res2, res1))
                chainsInContactL.add(chainId2)
                chainsInContactR.add(chainId1)

        allChainsL = set([elem.get_id() for elem in structureL[0].get_list()])
        allChainsR = set([elem.get_id() for elem in structureR[0].get_list()])
        chainsNotContactL = allChainsL.difference(chainsInContactL)
        chainsNotContactR = allChainsR.difference(chainsInContactR)
        return positiveContactsResidues, chainsNotContactL, chainsNotContactR
Пример #7
0
 def test_neighbor_search(self):
     """NeighborSearch: Find nearby randomly generated coordinates.
      
     Based on the self test in Bio.PDB.NeighborSearch.
     """
     class RandomAtom:
         def __init__(self):
             self.coord = 100 * random(3)
         def get_coord(self):
             return self.coord
     for i in range(0, 20):
         atoms = [RandomAtom() for j in range(100)]
         ns = NeighborSearch(atoms)
         hits = ns.search_all(5.0)
         self.assert_(hits >= 0)
Пример #8
0
    def consumeMoleDataframe(self, radius: float):
        """
        Takes a dataframe which must contain X,Y,Z columns assuming the centerline of the tunnel,
        although other columns can be present. Aimed at MOLE's mergd csv results.
        Iterates over each row and applies neighbor search on each focus, appends non-redundant residues
        to appropriate registry on the object. 
        """

        atoms = list(self.structure.get_atoms())
        ns = NeighborSearch(atoms, bucket_size=3)
        self.radius = radius

        def getVicinity(row):

            self.rescount += 1
            x = row['X']
            y = row['Y']
            z = row['Z']

            res: List[Residue.Residue] = ns.search(numpy.array([x, y, z]),
                                                   radius,
                                                   level='R')

            for nbr in res:
                self.addResidue(nbr)

        self.mole_dataframe.apply(getVicinity, axis=1)
Пример #9
0
def quick_neighbor_search_test():
    #Based on the self test in Bio.PDB.NeighborSearch
    from numpy.random import random
    from Bio.PDB.NeighborSearch import NeighborSearch

    class Atom:
        def __init__(self):
            self.coord=(100*random(3))

        def get_coord(self):
            return self.coord

    for i in range(0, 20):
        al = [Atom() for j in range(100)]
        ns=NeighborSearch(al)
        hits = ns.search_all(5.0)
        assert hits >= 0
    print "Done"    
Пример #10
0
def quick_neighbor_search_test():
    #Based on the self test in Bio.PDB.NeighborSearch
    from numpy.random import random
    from Bio.PDB.NeighborSearch import NeighborSearch

    class Atom:
        def __init__(self):
            self.coord = (100 * random(3))

        def get_coord(self):
            return self.coord

    for i in range(0, 20):
        al = [Atom() for j in range(100)]
        ns = NeighborSearch(al)
        hits = ns.search_all(5.0)
        assert hits >= 0
    print "Done"
Пример #11
0
    def test_neighbor_search(self):
        """NeighborSearch: Find nearby randomly generated coordinates.
         
        Based on the self test in Bio.PDB.NeighborSearch.
        """
        class RandomAtom:
            def __init__(self):
                self.coord = 100 * random(3)

            def get_coord(self):
                return self.coord

        for i in range(0, 20):
            atoms = [RandomAtom() for j in range(100)]
            ns = NeighborSearch(atoms)
            hits = ns.search_all(5.0)
            self.assertTrue(isinstance(hits, list), hits)
            self.assertTrue(len(hits) >= 0, hits)
Пример #12
0
def getLigandNbrs(resids: List[Residue], struct:Structure):
    ns   = NeighborSearch(list( struct.get_atoms() ))
    nbrs = []
    for r in resids:
        # a ligand consists of residues
        resatom = r.child_list[0]
        #  each residue has an atom plucked at random
        for nbr in ns.search(resatom.get_coord(), 5,level='R'):
            # we grab all residues in radius around that atom and extend the list with those
            nbrs.extend([* nbr])
    filtered = [] 
    for neighor in nbrs:
        present = 0
        for constit in resids:
            if ResidueId( constit ) == ResidueId( neighor ):
                present = 1
        if present == 0:
            filtered.append(ResidueId(neighor))
    return [ * map(lambda x: addBanClass(x) ,  set(filtered) ) ]
Пример #13
0
    def test_neighbor_search(self):
        """NeighborSearch: Find nearby randomly generated coordinates.

        Based on the self test in Bio.PDB.NeighborSearch.
        """

        # This comment stops black style adding a blank line here, which causes flake8 D202.
        class RandomAtom:
            def __init__(self):
                self.coord = 100 * random(3)

            def get_coord(self):
                return self.coord

        for i in range(0, 20):
            atoms = [RandomAtom() for j in range(100)]
            ns = NeighborSearch(atoms)
            hits = ns.search_all(5.0)
            self.assertTrue(isinstance(hits, list), hits)
            self.assertTrue(len(hits) >= 0, hits)
        x = array([250, 250, 250])  # Far away from our random atoms
        self.assertEqual([], ns.search(x, 5.0, "A"))
        self.assertEqual([], ns.search(x, 5.0, "R"))
        self.assertEqual([], ns.search(x, 5.0, "C"))
        self.assertEqual([], ns.search(x, 5.0, "M"))
        self.assertEqual([], ns.search(x, 5.0, "S"))
Пример #14
0
 def test_neighbor_search(self):
     """NeighborSearch: Find nearby randomly generated coordinates.
      
     Based on the self test in Bio.PDB.NeighborSearch.
     """
     class RandomAtom:
         def __init__(self):
             self.coord = 100 * random(3)
         def get_coord(self):
             return self.coord
     for i in range(0, 20):
         atoms = [RandomAtom() for j in range(100)]
         ns = NeighborSearch(atoms)
         hits = ns.search_all(5.0)
         self.assertTrue(isinstance(hits, list), hits)
         self.assertTrue(len(hits) >= 0, hits)
     x = array([250,250,250]) #Far away from our random atoms
     self.assertEqual([], ns.search(x, 5.0, "A"))
     self.assertEqual([], ns.search(x, 5.0, "R"))
     self.assertEqual([], ns.search(x, 5.0, "C"))
     self.assertEqual([], ns.search(x, 5.0, "M"))
     self.assertEqual([], ns.search(x, 5.0, "S"))
Пример #15
0
def getResidueNeighbors(struct:Structure, resid: ResidueFullIdDict, radius:float, level:str = 'R', all_levels=False)-> List[Residue] or List[Chain] or List[Chain or Residue]:

    """
    struct: opened cif structure
    resid: A dictionary containing the residue's identifiers (see associated class)
    radiue: radius of neighborhood to check
    level: Atom, Residue, Chain : one of A/R/C
    """

    if level.upper() not in ['A', 'R', 'C']:
        print('Level has to be one of A R C')
        raise Error

    parentStrand      :Chain         = struct[resid.model][resid.strand_id]
    residuesOfInterest:List[Residue] = list(filter(lambda x : x.get_full_id()[3][1] == resid.residue_id, parentStrand.child_list))

    ligandAtoms = residuesOfInterest[0].get_atoms()

    coords      = list( map(lambda atom: atom.get_coord(), ligandAtoms) )
    ns          = NeighborSearch(list( struct.get_atoms() ))

    yield ns.search(coords[0],radius,level.upper())
Пример #16
0
def get_contacts(atoms=[]):
    neighbor_search = NeighborSearch(atoms)
    neighbors = neighbor_search.search_all(radius=SEARCH_RADIUS)
    #print("Size of Neighbhor Map: %d" % len(neighbors))
    #neighbors_map: dictionary with tuple(ca_i,ca_j) as keys - eg: [(ca1,ca2):1,(ca21,ca45):1,...]
    neighbors_map = {(x[0].get_serial_number(), x[1].get_serial_number()): 1
                     for x in neighbors}
    no_of_native_contacts = len(neighbors_map)

    contact_map = {}
    n = len(atoms)
    #constuct contact map
    #total n(n-1)/2 entries, ( which excludes diagonal and symmetric entries from n*n pairs)
    for x in range(1, n - 1):
        for y in range((x + 1), n):
            #put {(Ci,Cj):1} if present if present in neighbors map, 0 otherwise
            contact_map[(x, y)] = neighbors_map.get(
                (atoms[x].get_serial_number(), atoms[y].get_serial_number()),
                0)

# print("In get_contacts:Size of atoms: %d :Length of the contact map: %d" % (n,len(contact_map)))
#print contact_map
    return contact_map, no_of_native_contacts
def get_backbone_links(struc, backbone_atoms, covlnk, join_models=True):
    """ Get links making the main chain """
    # TODO differenciate Protein and NA
    cov_links = []
    for mod in struc:
        bckats = []
        for atm in struc[mod.id].get_atoms():
            if atm.id in backbone_atoms:
                if atm.disordered_flag:
                    bckats.append(atm.selected_child)
                else:
                    bckats.append(atm)
        if bckats:
            nbsearch = NeighborSearch(bckats)
            for at1, at2 in nbsearch.search_all(covlnk):
                if not same_residue(at1, at2) \
                        and (join_models or same_model(at1.get_parent(), at2.get_parent())):
                    cov_links.append(
                        sorted([at1, at2], key=lambda x: x.serial_number))
        else:
            print("Warning: No backbone atoms defined")

    return cov_links
def findNeigChains(struct, chainIdL, chainIdR, res2res_dist=6, minContacts=20):

    searcher = NeighborSearch([
        atom for atom in struct[0].get_atoms()
        if atom is not None and not atom.name.startswith("H")
    ])
    allNeigs = searcher.search_all(res2res_dist, level="C")
    # print(allNeigs)
    chainL = struct[0][chainIdL]
    chainR = struct[0][chainIdR]
    ligandChains = set([chainL])
    receptorChains = set([chainR])
    addedChains = ligandChains.union(receptorChains)
    for neigsGroup in allNeigs:
        searcher = NeighborSearch([
            atom for chain in neigsGroup for atom in chain.get_atoms()
            if atom is not None and not atom.name.startswith("H")
            and atom.get_parent().resname != "HOH"
        ])
        resNeigs = searcher.search_all(res2res_dist, level="R")
        numContacts = 0
        for r1, r2 in resNeigs:
            if r1.get_parent().get_id() != r2.get_parent().get_id():
                numContacts += 1
        # print(neigsGroup, numContacts)
        if numContacts < minContacts:
            continue
        if chainL in neigsGroup or chainR in neigsGroup:
            for chain in neigsGroup:
                if chain not in addedChains:
                    if chainR in neigsGroup and chainL in neigsGroup:
                        if bool(random.getrandbits(1)):
                            receptorChains.add(chain)
                        else:
                            ligandChains.add(chain)
                        addedChains.add(chain)
                    elif chainR in neigsGroup:
                        receptorChains.add(chain)
                        addedChains.add(chain)
                    elif chainL in neigsGroup:
                        ligandChains.add(chain)
                        addedChains.add(chain)

    return ligandChains, receptorChains
    def test_neighbor_search(self):
        """NeighborSearch: Find nearby randomly generated coordinates.

        Based on the self test in Bio.PDB.NeighborSearch.
        """
        class RandomAtom:
            def __init__(self):
                self.coord = 100 * random(3)

            def get_coord(self):
                return self.coord

        for i in range(0, 20):
            atoms = [RandomAtom() for j in range(100)]
            ns = NeighborSearch(atoms)
            hits = ns.search_all(5.0)
            self.assertIsInstance(hits, list)
            self.assertGreaterEqual(len(hits), 0)
        x = array([250, 250, 250])  # Far away from our random atoms
        self.assertEqual([], ns.search(x, 5.0, "A"))
        self.assertEqual([], ns.search(x, 5.0, "R"))
        self.assertEqual([], ns.search(x, 5.0, "C"))
        self.assertEqual([], ns.search(x, 5.0, "M"))
        self.assertEqual([], ns.search(x, 5.0, "S"))
Пример #20
0
def compute_interactions(pdb_name, save_to_db=False):

    do_distances = True
    do_interactions = True
    distances = []
    classified = []

    # Ensure that the PDB name is lowercase
    pdb_name = pdb_name.lower()

    # Get the pdb structure
    struc = Structure.objects.get(
        protein_conformation__protein__entry_name=pdb_name)
    pdb_io = StringIO(struc.pdb_data.pdb)
    # Get the preferred chain
    preferred_chain = struc.preferred_chain.split(',')[0]

    # Get the Biopython structure for the PDB
    s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0]
    #s = pdb_get_structure(pdb_name)[0]
    chain = s[preferred_chain]
    #return classified, distances

    # remove residues without GN and only those matching receptor.
    residues = struc.protein_conformation.residue_set.exclude(
        generic_number=None).all().prefetch_related('generic_number')
    dbres = {}
    dblabel = {}
    for r in residues:
        dbres[r.sequence_number] = r
        dblabel[r.sequence_number] = r.generic_number.label
    ids_to_remove = []
    for res in chain:
        if not res.id[1] in dbres.keys() and res.get_resname() != "HOH":
            ids_to_remove.append(res.id)
    for i in ids_to_remove:
        chain.detach_child(i)

    if do_distances:
        for i1, res1 in enumerate(chain, 1):
            if not is_water(res1):
                for i2, res2 in enumerate(chain, 1):
                    if i2 > i1 and not is_water(res2):
                        # Do not calculate twice.
                        distance = res1['CA'] - res2['CA']
                        distances.append(
                            (dbres[res1.id[1]], dbres[res2.id[1]], distance,
                             dblabel[res1.id[1]], dblabel[res2.id[1]]))

    if do_interactions:
        atom_list = Selection.unfold_entities(s[preferred_chain], 'A')

        # Search for all neighbouring residues
        ns = NeighborSearch(atom_list)
        all_neighbors = ns.search_all(6.6, "R")

        # Filter all pairs containing non AA residues
        all_aa_neighbors = [
            pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])
        ]

        # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart
        all_aa_neighbors = [
            pair for pair in all_aa_neighbors
            if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES
        ]

        # For each pair of interacting residues, determine the type of interaction
        interactions = [
            InteractingPair(res_pair[0], res_pair[1], dbres[res_pair[0].id[1]],
                            dbres[res_pair[1].id[1]], struc)
            for res_pair in all_aa_neighbors
            if not is_water(res_pair[0]) and not is_water(res_pair[1])
        ]

        # Split unto classified and unclassified.
        classified = [
            interaction for interaction in interactions
            if len(interaction.get_interactions()) > 0
        ]

    if save_to_db:

        if do_interactions:
            # Delete previous for faster load in
            InteractingResiduePair.objects.filter(
                referenced_structure=struc).all().delete()

            # bulk_pair = []
            # for d in distances:
            #     pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc)
            #     bulk_pair.append(pair)

            # Create interaction dictionary
            interaction_pairs = {}
            for pair in classified:
                res_1 = pair.get_residue_1()
                res_2 = pair.get_residue_2()
                key = res_1.get_parent().get_id() + str(res_1.get_id(
                )[1]) + "_" + res_2.get_parent().get_id() + str(
                    res_2.get_id()[1])
                interaction_pairs[key] = pair

            # POSSIBLE ADDON: support for multiple water-mediated bonds
            ## Obtain list of water molecules
            water_list = {
                water
                for residue in s[preferred_chain]
                if residue.get_resname() == "HOH"
                for water in residue.get_atoms()
            }
            if len(water_list) > 0:
                ## Iterate water molecules over residue atom list
                water_neighbors = [
                    (water, match_res) for water in water_list
                    for match_res in ns.search(water.coord, 3.5, "R")
                    if not is_water(match_res) and (
                        is_hba(match_res) or is_hbd(match_res))
                ]

                # intersect between residues sharing the same interacting water
                for index_one in range(len(water_neighbors)):
                    water_pair_one = water_neighbors[index_one]

                    for index_two in [
                            index for index in range(index_one +
                                                     1, len(water_neighbors))
                            if water_pair_one[0] == water_neighbors[index][0]
                    ]:
                        water_pair_two = water_neighbors[index_two]
                        res_1 = water_pair_one[1]
                        res_2 = water_pair_two[1]

                        # TODO: order residues + check minimum spacing between residues
                        key = res_1.get_parent().get_id() + str(res_1.get_id(
                        )[1]) + "_" + res_2.get_parent().get_id() + str(
                            res_2.get_id()[1])

                        # Verify h-bonds between water and both residues
                        matches_one = InteractingPair.verify_water_hbond(
                            water_pair_one[1], water_pair_one[0])
                        matches_two = InteractingPair.verify_water_hbond(
                            water_pair_two[1], water_pair_two[0])
                        if len(matches_one) > 0 and len(matches_two) > 0:
                            # if not exists, create residue pair without interactions
                            if not key in interaction_pairs:
                                interaction_pairs[key] = InteractingPair(
                                    res_1, res_2, dbres[res_1.id[1]],
                                    dbres[res_2.id[1]], struc)

                            for a, b in zip(matches_one, matches_two):
                                # HACK: store water ID as part of first atom name
                                interaction_pairs[key].interactions.append(
                                    WaterMediated(
                                        a +
                                        "|" + str(water_pair_one[0].get_parent(
                                        ).get_id()[1]), b))

            for p in classified:
                p.save_into_database()

        if do_distances:
            # Distance.objects.filter(structure=struc).all().delete()
            bulk_distances = []
            for i, d in enumerate(distances):
                distance = Distance(distance=int(100 * d[2]),
                                    res1=d[0],
                                    res2=d[1],
                                    gn1=d[3],
                                    gn2=d[4],
                                    gns_pair='_'.join([d[3], d[4]]),
                                    structure=struc)
                bulk_distances.append(distance)
                if len(bulk_distances) > 1000:
                    pairs = Distance.objects.bulk_create(bulk_distances)
                    bulk_distances = []

            pairs = Distance.objects.bulk_create(bulk_distances)

    return classified, distances
Пример #21
0
# load structure from PDB file

st = parser.get_structure('1UBQ', '1ubq.pdb')

select = []

#Select only CA atoms

for at in st.get_atoms():
    if at.id == 'CA':
        select.append(at)
        print("ATOM:",
              at.get_parent().get_resname(),
              at.get_parent().id[1], at.id)

# Preparing search
nbsearch = NeighborSearch(select)

print("NBSEARCH:")

#Searching for contacts under HBLNK

ncontact = 1

for at1, at2 in nbsearch.search_all(MAXDIST):
    print("Contact: ", ncontact)
    print("at1", at1, at1.get_serial_number(), at1.get_parent().get_resname())
    print("at2", at2, at2.get_serial_number(), at2.get_parent().get_resname())
    print()
    ncontact += 1
Пример #22
0
def compute_interactions(pdb_name,save_to_db = False):

    do_distances = True
    do_interactions = True
    distances = []
    classified = []

    # Ensure that the PDB name is lowercase
    pdb_name = pdb_name.lower()

    # Get the pdb structure
    struc = Structure.objects.get(protein_conformation__protein__entry_name=pdb_name)
    pdb_io = StringIO(struc.pdb_data.pdb)
    # Get the preferred chain
    preferred_chain = struc.preferred_chain.split(',')[0]

    # Get the Biopython structure for the PDB
    s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0]
    #s = pdb_get_structure(pdb_name)[0]
    chain = s[preferred_chain]
    #return classified, distances

     # remove residues without GN and only those matching receptor.
    residues = struc.protein_conformation.residue_set.exclude(generic_number=None).all().prefetch_related('generic_number')
    dbres = {}
    dblabel = {}
    for r in residues:
        dbres[r.sequence_number] = r
        dblabel[r.sequence_number] = r.generic_number.label
    ids_to_remove = []
    for res in chain:
        if not res.id[1] in dbres.keys() and res.get_resname() != "HOH":
            ids_to_remove.append(res.id)
    for i in ids_to_remove:
        chain.detach_child(i)

    if do_distances:
        for i1,res1 in enumerate(chain,1):
            if not is_water(res1):
                for i2,res2 in enumerate(chain,1):
                    if i2>i1 and not is_water(res2):
                        # Do not calculate twice.
                        distance = res1['CA']-res2['CA']
                        distances.append((dbres[res1.id[1]],dbres[res2.id[1]],distance,dblabel[res1.id[1]],dblabel[res2.id[1]]))

    if do_interactions:
        atom_list = Selection.unfold_entities(s[preferred_chain], 'A')
        # Search for all neighbouring residues
        ns = NeighborSearch(atom_list)
        all_neighbors = ns.search_all(4.5, "R")

        # Filter all pairs containing non AA residues
        all_aa_neighbors = [pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])]

        # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart
        all_aa_neighbors = [pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES]

        # For each pair of interacting residues, determine the type of interaction
        interactions = [InteractingPair(res_pair[0], res_pair[1], get_interactions(res_pair[0], res_pair[1]),dbres[res_pair[0].id[1]], dbres[res_pair[1].id[1]],struc) for res_pair in all_aa_neighbors]

        # Split unto classified and unclassified.
        classified = [interaction for interaction in interactions if len(interaction.get_interactions()) > 0]

    if save_to_db: 

        if do_interactions:
            # Delete previous for faster load in
            InteractingResiduePair.objects.filter(referenced_structure=struc).all().delete()

            # bulk_pair = []
            # for d in distances:
            #     pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc)
            #     bulk_pair.append(pair)

            # Create interaction dictionary
            interaction_pairs = {}
            for pair in classified:
                res_1 = pair.get_residue_1()
                res_2 = pair.get_residue_2()
                key =  res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1])
                interaction_pairs[key] = pair

            # POSSIBLE ADDON: support for multiple water-mediated bonds
            ## Obtain list of water molecules
            water_list = { water for residue in s[preferred_chain] if residue.get_resname() == "HOH" for water in residue.get_atoms() }
            if len(water_list) > 0:
                ## Iterate water molecules over residue atom list
                water_neighbors = [(water, match_res) for water in water_list
                                for match_res in ns.search(water.coord, 3.5, "R") if not is_water(match_res)]

                # intersect between residues sharing the same interacting water
                for index_one in range(len(water_neighbors)):
                    water_pair_one = water_neighbors[index_one]

                    for index_two in [ index for index in range(index_one+1, len(water_neighbors)) if water_pair_one[0]==water_neighbors[index][0] ]:
                        water_pair_two = water_neighbors[index_two]
                        res_1 = water_pair_one[1]
                        res_2 = water_pair_two[1]
                        key =  res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1])

                        # Check if interaction is polar - NOTE: this is not capturing every angle
                        if any(get_polar_interactions(water_pair_one[0].get_parent(), water_pair_one[1])) and any(get_polar_interactions(water_pair_two[0].get_parent(), water_pair_two[1])):
                            # NOTE: Is splitting of sidechain and backbone-mediated interactions desired?
                            if key in interaction_pairs:
                                interaction_pairs[key].interactions.append(WaterMediated())
                            else:
                                interaction_pairs[key] = InteractingPair(res_1, res_2, [WaterMediated()], dbres[res_1.id[1]], dbres[res_2.id[1]], struc)

            for p in classified:
                p.save_into_database()

        if do_distances:
            # Distance.objects.filter(structure=struc).all().delete()
            bulk_distances = []
            for i,d in enumerate(distances):
                distance = Distance(distance=int(100*d[2]),res1=d[0], res2=d[1],gn1=d[3], gn2=d[4], gns_pair='_'.join([d[3],d[4]]), structure=struc)
                bulk_distances.append(distance)
                if len(bulk_distances)>1000:
                    pairs = Distance.objects.bulk_create(bulk_distances)
                    bulk_distances = []

            pairs = Distance.objects.bulk_create(bulk_distances)



    return classified, distances
Пример #23
0
def main():

    parser = argparse.ArgumentParser(prog='polarContacts',
                                     description='Polar contacts detector')

    parser.add_argument('--backonly',
                        action='store_true',
                        dest='backonly',
                        help='Restrict to backbone')

    parser.add_argument('--nowats',
                        action='store_true',
                        dest='nowats',
                        help='Exclude water molecules')

    parser.add_argument('--diel',
                        type=float,
                        action='store',
                        dest='diel',
                        default=1.0,
                        help='Relative dielectric constant')

    parser.add_argument('--vdw',
                        action='store',
                        dest='vdwprm',
                        help='VDW Paramters file')

    parser.add_argument('--rlib',
                        action='store',
                        dest='reslib',
                        help='AminoAcid library')

    parser.add_argument('pdb_path')

    args = parser.parse_args()

    print("Settings")
    print("--------")
    for k, v in vars(args).items():
        print('{:10}:'.format(k), v)

    backonly = args.backonly
    nowats = args.nowats
    pdb_path = args.pdb_path
    vdwprm = args.vdwprm
    reslib = args.reslib
    diel = args.diel

    # Load VDW parameters
    vdwParams = VdwParamset(vdwprm)
    print("{} atom types loaded".format(vdwParams.ntypes))

    # Load AA Library
    aaLib = ResiduesDataLib(reslib)
    print("{} amino acid atoms loaded".format(aaLib.nres))

    if not pdb_path:
        parser.print_help()
        sys.exit(2)

    parser = PDBParser(PERMISSIVE=1)

    try:
        st = parser.get_structure('st', pdb_path)
    except OSError:
        print("#ERROR: loading PDB")
        sys.exit(2)

# Checking for models
    if len(st) > 1:
        print("#WARNING: Several Models found, using only first")

# Using Model 0 any way
    st = st[0]

    # Making a list of polar atoms
    polats = []
    if backonly:
        selected_atoms = backbone_polars
    else:
        selected_atoms = all_polars

    for at in st.get_atoms():
        if at.id in selected_atoms:
            polats.append(at)
#Searching for contacts under HNLNK on diferent residues
    nbsearch = NeighborSearch(polats)
    hblist = []
    for at1, at2 in nbsearch.search_all(HBLNK):
        if at1.get_parent() == at2.get_parent():
            continue
#Discard covalents and neighbours
        if (at1 - at2) < COVLNK:
            continue
        if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1:
            continue
# remove waters
        if nowats:
            if at1.get_parent().get_resname() in waternames \
             or at2.get_parent().get_resname() in waternames:
                continue

#     atom1 = Atom(at1,1,aaLib,vdwParams)
#     atom2 = Atom(at2,1,aaLib,vdwParams)
        if at1.get_serial_number() < at2.get_serial_number():
            hblist.append([at1, at2])
        else:
            hblist.append([at2, at1])

    print()
    print("Polar contacts")
    print('{:13} {:13} {:6} '.format('Atom1', 'Atom2', 'Dist (A)'))

    for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()):
        r1 = hb[0].get_parent()
        r2 = hb[1].get_parent()
        print('{:14} {:14} {:6.3f} '.format(
            r1.get_resname() + ' ' + str(r1.id[1]) + hb[0].id,
            r2.get_resname() + ' ' + str(r2.id[1]) + hb[1].id, hb[0] - hb[1]))
    print()
    print("Residue interactions")

    # Making list or residue pairs to avoid repeated pairs
    respairs = []
    for hb in hblist:
        r1 = hb[0].get_parent()
        r2 = hb[1].get_parent()
        if [r1, r2] not in respairs:
            respairs.append([r1, r2])
    list5 = []
    for rpair in sorted(respairs, key=lambda i: i[0].id[1]):
        eint = 0.
        evdw = 0.
        for at1 in rpair[0].get_atoms():
            resid1 = rpair[0].get_resname()
            atid1 = at1.id
            atparam1 = aaLib.getParams(resid1, atid1)
            vdwprm1 = vdwParams.atTypes[atparam1.atType]
            for at2 in rpair[1].get_atoms():
                resid2 = rpair[1].get_resname()
                atid2 = at2.id
                atparam2 = aaLib.getParams(resid2, atid2)
                vdwprm2 = vdwParams.atTypes[atparam2.atType]
                eint = eint + 332.16 * atparam1.charg * atparam2.charg / diel / (
                    at1 - at2)
                eps = math.sqrt(vdwprm1.eps * vdwprm2.eps)
                sig = math.sqrt(vdwprm1.sig * vdwprm2.sig)
                evdw = evdw + 4 * eps * ((sig / (at1 - at2))**12 -
                                         (sig / (at1 - at2))**6)
        list5.append((resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint,
                      evdw, eint + evdw))  #list all

        print(resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw,
              eint +
              evdw)  # eint= electrostatic, evdw= vanderwaals, eint+evdw= total
    i = 0
    for element in sorted(list5, key=lambda li: li[6]):  #sort list
        i += 1
        print(element)
        if i == 5:
            break  #gives you the 5 first atoms
Пример #24
0
st = parser.get_structure('1UBQ', args.pdb_file)

polar_ats = []

for at in st.get_atoms():
    res = at.get_parent()
    if res.id[0].startswith('H_') or res.id[0].startswith('W') and not args.hetatm:
        continue
    if at.element in POLAR:
        polar_ats.append(at)

print (len(polar_ats), 'POLAR Atoms found')

# Preparing search
nbsearch = NeighborSearch(polar_ats)

at_pairs =  nbsearch.search_all(HBLNK)

hbs = {}

for at1, at2 in sorted(at_pairs, key=lambda at_pair: at_pair[0].serial_number):
    res1 = at1.get_parent()
    res2 = at2.get_parent()
    # remove atom pairs from the same residue and next in sequence
    if res2.id[1] - res1.id[1] < 2:
        continue
    # using the contact with the shortest distance between residues
    dist = at1 - at2
    if res1 not in hbs:
        hbs[res1] = {}
Пример #25
0
    def find_ligands(self, radius=3):
        """
        Searches the parent structure for hetero components close to the
        catalytic residues, by searching around the atoms of catalytic residues
        and the dummy atoms between distant residues. Populates the ligands list 
        with Het objects.

        Args:
            radius: the search space (in Å) around the atoms of the catalytic residues
        """
        if type(self.parent_structure) != Structure:
            return
        # Get centers of search
        centers = self._get_ligand_search_centers(radius)
        # Initialize KD tree
        query_atoms = Bio.PDB.Selection.unfold_entities(
            self.parent_structure, 'A')
        ns = NeighborSearch(query_atoms)
        # Search for ligands around each center
        polymers = defaultdict(list)
        site_chains = set([res.chain for res in self])
        seen = set()
        added = set()
        # Search for components close to catalytic residues
        for center in centers:
            hits = ns.search(center, radius, level='R')
            for res in hits:
                if res.get_full_id() in seen:
                    continue
                seen.add(res.get_full_id())
                restype = res.get_id()[0][0]
                chain = res.get_parent().get_id()
                # Ignore waters
                if restype == 'W':
                    continue
                # HET components
                if restype == 'H':
                    self.add(
                        Het(self.mcsa_id,
                            self.pdb_id,
                            res.get_resname(),
                            res.get_id()[1],
                            chain,
                            structure=res,
                            parent_site=self))
                    added.add(res.get_full_id())
                # Protein/nucleic polymer components
                if restype == ' ' and chain not in site_chains:
                    polymers[chain].append(res)
        # Build polymers
        if self.acts_on_polymer:
            for chain, reslist in polymers.items():
                self.add(
                    Het.polymer(reslist, self.mcsa_id, self.pdb_id, chain,
                                self))
        # Find distal co-factor-like or substrate-like molecules
        hits = ns.search(self.structure.center_of_mass(geometric=True),
                         30,
                         level='R')
        for res in hits:
            restype = res.get_id()[0][0]
            if restype == 'H' and res.get_full_id() not in added:
                ligand = Het(self.mcsa_id,
                             self.pdb_id,
                             res.get_resname(),
                             res.get_id()[1],
                             res.get_parent().get_id(),
                             structure=res,
                             parent_site=self)
                if ligand.type in ('Substrate (non-polymer)',
                                   'Co-factor (non-ion)'):
                    ligand.is_distal = True
                    self.add(ligand)
        return
Пример #26
0
#
# Simple program to search contacts

from Bio.PDB.NeighborSearch import NeighborSearch
from Bio.PDB.PDBParser import PDBParser

HBLNK = 3.5  # Define distance for a contact

parser = PDBParser(PERMISSIVE=1)

st = parser.get_structure('estructura', '1ubq.pdb')

selecc = []

for at in st.get_atoms():
    selecc.append(at)
    print("ATOM:", at)

#Selecting all atoms.
nbsearch = NeighborSearch(selecc)

print("NBSEARCH:")
#Searching for contacts under HBLNK
ncontact = 1
for at1, at2 in nbsearch.search_all(HBLNK):
    print("Contact: ", ncontact)
    print("at1", at1, at1.get_serial_number(), at1.get_parent().get_resname())
    print("at2", at2, at2.get_serial_number(), at2.get_parent().get_resname())
    print()
    ncontact += 1
Пример #27
0
print('Parsing', args.pdb_file)

# load structure from PDB file of PDB ifle handler
st = parser.get_structure('STR', args.pdb_file.name)

# collecting Polar atoms
polar_atoms = []

for at in st.get_atoms():
    if at.element in POLAR_ELEMENTS:
        polar_atoms.append(at)

print(len(polar_atoms), 'polar Atoms found')

# Preparing search
nbsearch = NeighborSearch(polar_atoms)

at_pairs = nbsearch.search_all(args.hb_max_dist)

# Output sorted by atom,serial_number, nbsearch returns ordered pairs
# Redirect the output with > output_list
for at1, at2 in sorted(at_pairs, key=lambda at_pair: at_pair[0].serial_number):
    ad = True
    da = True
    if args.check:
        da, ad = check_hb(at1, at2)
    if da and ad:  # classification not requested or not predictable
        print('{:11}  : {:11}  {:8.3f}'.format(atom_id(at1), atom_id(at2),
                                               at1 - at2))
    elif da:  # Donor - Acceptor
        print('{:11}D : {:11}A {:8.3f}'.format(atom_id(at1), atom_id(at2),
Пример #28
0
def main():
  
    parser = argparse.ArgumentParser(
                prog='polarContacts', 
                description='Polar contacts detector'
            )

    parser.add_argument(
        '--backonly', 
        action='store_true', 
        dest='backonly',
        help='Restrict to backbone'
    )

    parser.add_argument(
        '--nowats', 
        action='store_true', 
        dest='nowats',
        help='Exclude water molecules'
    )

    parser.add_argument('pdb_path')
    
    args = parser.parse_args()
    
    print ("Settings")
    print ("--------")
    for k,v in vars(args).items():
        print ('{:10}:'.format(k),v)
    
    backonly = args.backonly
    nowats =args.nowats
    pdb_path = args.pdb_path
    
    if not pdb_path:
        parser.print_help()
        sys.exit(2)        

    parser = PDBParser(PERMISSIVE=1)
    
    try:
        st = parser.get_structure('st', pdb_path)
    except OSError:
        print ("#ERROR: loading PDB")
        sys.exit(2)

# Checking for models
    if len(st) > 1:
        print ("#WARNING: Several Models found, using only first")

# Using Model 0 any way
    st = st[0]   

# Making a list of polar atoms
    polats = []
    if backonly:
        selected_atoms = backbone_polars
    else:
        selected_atoms = all_polars
        
    for at in st.get_atoms():
        if at.id in selected_atoms:
            polats.append(at)
#Searching for contacts under HNLNK on diferent residues            
    nbsearch = NeighborSearch(polats)  
    hblist = []
    for at1, at2 in nbsearch.search_all(HBLNK):
        if at1.get_parent() == at2.get_parent():
            continue
 #Discard covalents and neighbours
        if (at1-at2) < COVLNK:
            continue
        if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1:
            continue
# remove waters
        if nowats:
            if at1.get_parent().get_resname() in waternames \
                or at2.get_parent().get_resname() in waternames:
                continue
        if at1.get_serial_number() < at2.get_serial_number():
            hblist.append([StructureWrapper.Atom(at1,1),StructureWrapper.Atom(at2,1)])
        else:
            hblist.append([StructureWrapper.Atom(at2,1),StructureWrapper.Atom(at1,1)])

    print ()
    print ("Polar contacts")
    print ('{:13} ({:4}, {:6}) {:13} ({:4}, {:6}) {:6} '.format(
            'Atom1','Type','Charge','Atom2','type','charge','Dist (A)')
    )
    for hb in sorted (hblist,key=lambda i: i[0].at.get_serial_number()):
        print ('{:14} {:14} {:6.3f} '.format(
            hb[0].atid(),
            hb[1].atid(),
            hb[0].at - hb[1].at
            )
        )
Пример #29
0
for k, v in vars(args).items():
    print('{:10}:'.format(k), v)

print("PDB.filename:", args.pdb_file.name)

parser = PDBParser(PERMISSIVE=1)

print('Parsing', args.pdb_file)

# load structure from PDB file of PDB ifle handler
st = parser.get_structure('STR', args.pdb_file.name)

# collecting CA atoms
ca_atoms = []

for at in st.get_atoms():
    if at.id == 'CA':
        ca_atoms.append(at)

print(len(ca_atoms), 'CA Atoms found')

# Preparing search
nbsearch = NeighborSearch(ca_atoms)

at_pairs = nbsearch.search_all(args.max_dist)

# Output sorted by atom,serial_number, nbsearch returns ordered pairs
# Redirect the output with > output_list
for at1, at2 in sorted(at_pairs, key=lambda at_pair: at_pair[0].serial_number):
    print(atom_id(at1), ":", atom_id(at2), at1 - at2)
Пример #30
0
        coo_list.append(coo)

    return res_list, np.concatenate(coo_list).reshape((-1, 3))


# %%
p, c = "4fae", "B"
parser = PDBParser()
if PreProcess.download_pdb(p, f'{p}.pdb'):
    structure = parser.get_structure('a', f'{p}.pdb')
    chain = structure[0][c]

    res_list, coo_list = get_residue_feature(chain)

    atom_list = [i for i in chain.get_atoms()]
    ns = NeighborSearch(atom_list)
    ns_list = ns.search_all(3.3, level='R')

    edge, l = [], len(res_list)
    for ai, aj in ns_list:
        i, j = ai.get_id()[1]-1, aj.get_id()[1]-1
        if i < j-1 and j < l:
            edge.append([i, j, 1])

#%%
mod = 'train'
out = {'train': {}, 'test':{}, 'valid':{}}
out[mod]['edge'] = 


def main():

    parser = argparse.ArgumentParser(prog='polarContacts',
                                     description='Polar contacts detector')

    parser.add_argument('--backonly',
                        action='store_true',
                        dest='backonly',
                        help='Restrict to backbone')
    parser.add_argument(  #Argument to plot the different number of polar contacts that each residue has
        '--plot',
        action='store_true',
        dest='plotMode',
        help='Restrict to sidechain')
    parser.add_argument('--nowats',
                        action='store_true',
                        dest='nowats',
                        help='Exclude water molecules')

    parser.add_argument('--diel',
                        type=float,
                        action='store',
                        dest='diel',
                        default=1.0,
                        help='Relative dielectric constant')

    parser.add_argument('--vdw',
                        action='store',
                        dest='vdwprm',
                        help='VDW Paramters file')

    parser.add_argument('--rlib',
                        action='store',
                        dest='reslib',
                        help='AminoAcid library')

    parser.add_argument(
        '--index',
        action='store',
        dest='index',
        help='Only select the molecules with more stable contact by an index')

    parser.add_argument(
        '--surf',
        action='store_true',
        dest='surf',
        help='Usa ASA',
    )

    parser.add_argument('pdb_path')

    args = parser.parse_args()

    print("Settings")
    print("--------")
    for k, v in vars(args).items():
        print('{:10}:'.format(k), v)

    backonly = args.backonly
    nowats = args.nowats
    plotMode = args.plotMode
    pdb_path = args.pdb_path
    vdwprm = args.vdwprm
    reslib = args.reslib
    index = args.index
    surf = args.surf
    diel = args.diel

    # Load VDW parameters
    vdwParams = VdwParamset(vdwprm)
    print("{} atom types loaded".format(vdwParams.ntypes))

    # Load AA Library
    aaLib = ResiduesDataLib(reslib)
    print("{} amino acid atoms loaded".format(aaLib.nres))

    if not pdb_path:
        parser.print_help()
        sys.exit(2)

    parser = PDBParser(PERMISSIVE=1)

    try:
        st = parser.get_structure('st', pdb_path)
    except OSError:
        print("#ERROR: loading PDB")
        sys.exit(2)

# Checking for model
    if len(st) > 1:
        print("#WARNING: Several Models found, using only first")

# Using Model 0 any way
    st = st[0]
    # Getting surfaces
    if surf:
        res_surfaces = NACCESS(
            st,
            naccess_binary=
            '/Users/daniel/Downloads/BioPhysics-energies0/NACCESS/naccess')
        at_surfaces = NACCESS_atomic(
            st,
            naccess_binary=
            '/Users/daniel/Downloads/BioPhysics-energies0/NACCESS/naccess')
        print("Surfaces obtained from NACCESS")

# Making a list of polar atoms
    polats = []
    if backonly:
        selected_atoms = backbone_polars

    else:
        selected_atoms = all_polars

    for at in st.get_atoms():
        if at.id in selected_atoms:
            polats.append(at)
#Searching for contacts under HNLNK on diferent residues
    nbsearch = NeighborSearch(polats)
    hblist = []
    for at1, at2 in nbsearch.search_all(HBLNK):
        if at1.get_parent() == at2.get_parent():
            continue
#Discard covalents and neighbours
        if (at1 - at2) < COVLNK:
            continue
        if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1:
            continue


# remove waters
        if nowats:
            if at1.get_parent().get_resname() in waternames \
                or at2.get_parent().get_resname() in waternames:
                continue

        atom1 = Atom(at1, 1, aaLib, vdwParams)
        atom2 = Atom(at2, 1, aaLib, vdwParams)
        if at1.get_serial_number() < at2.get_serial_number():
            hblist.append([at1, at2])
        else:
            hblist.append([at2, at1])

    print()
    print("Polar contacts")
    print('{:13} {:13} {:6} '.format('Atom1', 'Atom2', 'Dist (A)'))
    if plotMode:  #Dictionary to save as a key the residues with polar contacts and as values the number of contacts on sidechain or mainchain
        sidecontacts = {}
        maincontacts = {}
    res = set(
    )  #A set to store the residues involved on polar contacts. stored as a set to avoid repeated residues

    for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()):
        r1 = hb[0].get_parent()
        r2 = hb[1].get_parent()
        res.add(str(r1.id[1]) + r1.get_resname())
        print('{:14} {:14} {:6.3f} '.format(
            r1.get_resname() + ' ' + str(r1.id[1]) + hb[0].id,
            r2.get_resname() + ' ' + str(r2.id[1]) + hb[1].id, hb[0] - hb[1]))
        if surf:
            print('{:14} ({:6.3f}) {:14} ({:6.3f}) {:6.3f} '.format(
                hb[0].atid(),
                float(hb[0].at.xtra['EXP_NACCESS']), hb[1].atid(),
                float(hb[1].at.xtra['EXP_NACCESS']), hb[0].at - hb[1].at))
        if plotMode:
            if hb[0].id in backbone_polars and hb[1].id in backbone_polars:
                if r1.id[1] not in maincontacts:
                    maincontacts[r1.id[1]] = 1
                    if r1.id[1] not in sidecontacts:
                        sidecontacts[r1.id[1]] = 0
                else:
                    maincontacts[r1.id[1]] += 1

            else:
                if r1.id[1] not in sidecontacts:

                    if r1.id[1] not in maincontacts:
                        maincontacts[r1.id[1]] = 0

                    sidecontacts[r1.id[1]] = 1

                else:
                    sidecontacts[r1.id[1]] += 1
    if plotMode:  #Plot the number of polar contacts of each residue on a bar graph.
        #Contacts on mainchain are blue bars, contacts on sidechain red bars

        N = len(res)
        nindex = np.arange(N)
        main_con = maincontacts.values()
        side_con = sidecontacts.values()
        fig, ax = plt.subplots()
        bar_width = 0.35

        rec1 = ax.bar(nindex,
                      main_con,
                      bar_width,
                      color='b',
                      label='Mainchain contacts')
        rec2 = ax.bar(nindex + bar_width,
                      side_con,
                      bar_width,
                      color='r',
                      label='Sidechain contacts')
        ax.set_xlabel('AminoAcid Number')
        ax.set_ylabel('Number of polar contacts')
        ax.set_xticks(nindex + bar_width / 2, res)
        ax.set_xticklabels(res)
        ax.legend()
        fig.tight_layout()
        plt.show()

    print()
    print("Residue interactions")

    # Making list or residue pairs to avoid repeated pairs
    respairs = []
    for hb in hblist:
        r1 = Residue(hb[0].get_parent(), 1, aaLib, vdwParams)
        r2 = Residue(hb[1].get_parent(), 1, aaLib, vdwParams)
        if [r1, r2] not in respairs:
            respairs.append([r1, r2])
    l = []
    for rpair in sorted(respairs, key=lambda i: i[0].resNum()):
        eint = rpair[0].elecInt(rpair[1], diel)
        evdw = rpair[0].vdwInt(rpair[1])
        print('{:10} {:10} {: 8.4f} {: 8.4f} {: 8.4f}'.format(
            rpair[0].resid(), rpair[1].resid(), eint, evdw, eint + evdw))
        l.append([
            rpair[0].resid(), rpair[0], rpair[1].resid(), rpair[1], eint, evdw,
            eint + evdw
        ])

    if index is not None:  #Select only the residues with less energy (the most stables ones)
        for e, element in enumerate(sorted(l, key=lambda i: i[6])):
            if e < int(index):
                print(element)
    if surf:
        srfr1 = float(rpair[0].residue.xtra['EXP_NACCESS']['all_polar_rel'])
        srfr2 = float(rpair[1].residue.xtra['EXP_NACCESS']['all_polar_rel'])
        # Define 30% threshold for buried
        if (srfr1 > 30.) or (srfr2 > 30.):
            diel0 = 80.0
        else:
            diel0 = diel
        eint = rpair[0].elecInt(rpair[1], diel0)
        print(
            '{:10} ({:>8.3f}) {:10}  ({:>8.3f}) (e: {:4.1f}) {:>8.4f} {:>8.4f} {:>8.4f}'
            .format(rpair[0].resid(), srfr1, rpair[1].resid(), srfr2, diel0,
                    eint, evdw, eint + evdw))
def _get_contacts(ats_list, d_cutoff):
    contact_list = []
    nbsearch = NeighborSearch(ats_list)
    for at1, at2 in nbsearch.search_all(d_cutoff):
        contact_list.append((at1.get_parent(), at2.get_parent(), at1 - at2))
    return contact_list
Пример #33
0
def compute_interactions(pdb_name, save_to_db=False):

    do_distances = True
    do_interactions = True
    do_complexes = True
    distances = []
    classified = []
    classified_complex = []

    # Ensure that the PDB name is lowercase
    pdb_name = pdb_name.lower()

    # Get the pdb structure
    struc = Structure.objects.get(
        protein_conformation__protein__entry_name=pdb_name)
    pdb_io = StringIO(struc.pdb_data.pdb)
    # Get the preferred chain
    preferred_chain = struc.preferred_chain.split(',')[0]

    # Get the Biopython structure for the PDB
    s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0]
    #s = pdb_get_structure(pdb_name)[0]
    chain = s[preferred_chain]
    #return classified, distances

    # remove residues without GN and only those matching receptor.
    residues = struc.protein_conformation.residue_set.exclude(
        generic_number=None).all().prefetch_related('generic_number')
    dbres = {}
    dblabel = {}
    for r in residues:
        dbres[r.sequence_number] = r
        dblabel[r.sequence_number] = r.generic_number.label
    ids_to_remove = []
    for res in chain:
        if not res.id[1] in dbres.keys() and res.get_resname() != "HOH":
            ids_to_remove.append(res.id)
    for i in ids_to_remove:
        chain.detach_child(i)

    if do_distances:
        for i1, res1 in enumerate(chain, 1):
            if not is_water(res1):
                for i2, res2 in enumerate(chain, 1):
                    if i2 > i1 and not is_water(res2):
                        # Do not calculate twice.
                        distance = res1['CA'] - res2['CA']
                        distances.append(
                            (dbres[res1.id[1]], dbres[res2.id[1]], distance,
                             dblabel[res1.id[1]], dblabel[res2.id[1]]))

    if do_interactions:
        atom_list = Selection.unfold_entities(s[preferred_chain], 'A')

        # Search for all neighbouring residues
        ns = NeighborSearch(atom_list)
        all_neighbors = ns.search_all(6.6, "R")

        # Filter all pairs containing non AA residues
        all_aa_neighbors = [
            pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])
        ]

        # Only include contacts between residues more than NUM_SKIP_RESIDUES sequence steps apart
        all_aa_neighbors = [
            pair for pair in all_aa_neighbors
            if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES
        ]

        # For each pair of interacting residues, determine the type of interaction
        interactions = [
            InteractingPair(res_pair[0], res_pair[1], dbres[res_pair[0].id[1]],
                            dbres[res_pair[1].id[1]], struc)
            for res_pair in all_aa_neighbors
            if not is_water(res_pair[0]) and not is_water(res_pair[1])
        ]

        # Split unto classified and unclassified.
        classified = [
            interaction for interaction in interactions
            if len(interaction.get_interactions()) > 0
        ]

    if do_complexes:
        try:
            # check if structure in signprot_complex
            complex = SignprotComplex.objects.get(structure=struc)

            # Get all GPCR residue atoms based on preferred chain
            gpcr_atom_list = [ atom for residue in Selection.unfold_entities(s[preferred_chain], 'R') if is_aa(residue) \
                            for atom in residue.get_atoms()]

            # Get all residue atoms from the coupled protein (e.g. G-protein)
            # NOW: select alpha subnit protein chain using complex model
            sign_atom_list = [ atom for residue in Selection.unfold_entities(s[complex.alpha], 'R') if is_aa(residue) \
                                for atom in residue.get_atoms()]

            ns_gpcr = NeighborSearch(gpcr_atom_list)
            ns_sign = NeighborSearch(sign_atom_list)

            # For each GPCR atom perform the neighbor search on the signaling protein
            all_neighbors = {
                (gpcr_atom.parent, match_res)
                for gpcr_atom in gpcr_atom_list
                for match_res in ns_sign.search(gpcr_atom.coord, 4.5, "R")
            }

            # For each pair of interacting residues, determine the type of interaction
            residues_sign = ProteinConformation.objects.get(
                protein__entry_name=pdb_name + "_" +
                complex.alpha.lower()).residue_set.exclude(
                    generic_number=None).all().prefetch_related(
                        'generic_number')

            # grab labels from sign protein
            dbres_sign = {}
            dblabel_sign = {}
            for r in residues_sign:
                dbres_sign[r.sequence_number] = r
                dblabel_sign[r.sequence_number] = r.generic_number.label

            # Find interactions
            interactions = [
                InteractingPair(res_pair[0], res_pair[1],
                                dbres[res_pair[0].id[1]],
                                dbres_sign[res_pair[1].id[1]], struc)
                for res_pair in all_neighbors if res_pair[0].id[1] in dbres
                and res_pair[1].id[1] in dbres_sign
            ]

            # Filter unclassified interactions
            classified_complex = [
                interaction for interaction in interactions
                if len(interaction.get_interactions()) > 0
            ]

            # Convert to dictionary for water calculations
            interaction_pairs = {}
            for pair in classified_complex:
                res_1 = pair.get_residue_1()
                res_2 = pair.get_residue_2()
                key = res_1.get_parent().get_id() + str(res_1.get_id(
                )[1]) + "_" + res_2.get_parent().get_id() + str(
                    res_2.get_id()[1])
                interaction_pairs[key] = pair

            # Obtain list of all water molecules in the structure
            water_list = {
                water
                for chain in s for residue in chain
                if residue.get_resname() == "HOH"
                for water in residue.get_atoms()
            }

            # If waters are present calculate water-mediated interactions
            if len(water_list) > 0:
                ## Iterate water molecules over coupled and gpcr atom list
                water_neighbors_gpcr = {
                    (water, match_res)
                    for water in water_list
                    for match_res in ns_gpcr.search(water.coord, 3.5, "R")
                }

                water_neighbors_sign = {
                    (water, match_res)
                    for water in water_list
                    for match_res in ns_sign.search(water.coord, 3.5, "R")
                }

                # TODO: DEBUG AND VERIFY this code as water-mediated interactions were present at this time
                # 1. UPDATE complexes to include also mini Gs and peptides (e.g. 4X1H/6FUF/5G53)
                # 2. Run and verify water-mediated do_interactions
                # 3. Improve the intersection between the two hit lists

                ## TODO: cleaner intersection between hits from the two Lists
                # see new code below
#                for water_pair_one in water_neighbors_gpcr:
#                    for water_pair_two in water_neighbors_sign:
#                        if water_pair_one[0]==water_pair_two[0]:
#                            res_1 = water_pair_one[1]
#                            res_2 = water_pair_two[1]
#                            key =  res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1])

# Check if interaction is polar
#                            if any(get_polar_interactions(water_pair_one[0].get_parent(), water_pair_one[1])) and any(get_polar_interactions(water_pair_two[0].get_parent(), water_pair_two[1])):
# TODO Check if water interaction is already present (e.g. multiple waters)
# TODO Is splitting of sidechain and backbone-mediated interactions desired?
#                                if not key in interaction_pairs:
#                                    interaction_pairs[key] = InteractingPair(res_1, res_2, dbres[res_1.id[1]], dbres_sign[res_2.id[1]], struc)

# TODO: fix assignment of interacting atom labels (now seems limited to residues)
#                                interaction_pairs[key].interactions.append(WaterMediated(a + "|" + str(water_pair_one[0].get_parent().get_id()[1]), b))

        except SignprotComplex.DoesNotExist:
            #            print("No complex definition found for", pdb_name)
            log = "No complex definition found for " + pdb_name
        except ProteinConformation.DoesNotExist:
            print(
                "No protein conformation definition found for signaling protein of ",
                pdb_name)
#            log = "No protein conformation definition found for signaling protein of " + pdb_name

    if save_to_db:

        if do_interactions:
            # Delete previous for faster load in
            InteractingResiduePair.objects.filter(
                referenced_structure=struc).all().delete()

            # bulk_pair = []
            # for d in distances:
            #     pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc)
            #     bulk_pair.append(pair)

            # Create interaction dictionary
            interaction_pairs = {}
            for pair in classified:
                res_1 = pair.get_residue_1()
                res_2 = pair.get_residue_2()
                key = res_1.get_parent().get_id() + str(res_1.get_id(
                )[1]) + "_" + res_2.get_parent().get_id() + str(
                    res_2.get_id()[1])
                interaction_pairs[key] = pair

            # POSSIBLE ADDON: support for multiple water-mediated bonds
            ## Obtain list of water molecules
            water_list = {
                water
                for residue in s[preferred_chain]
                if residue.get_resname() == "HOH"
                for water in residue.get_atoms()
            }
            if len(water_list) > 0:
                ## Iterate water molecules over residue atom list
                water_neighbors = [
                    (water, match_res) for water in water_list
                    for match_res in ns.search(water.coord, 3.5, "R")
                    if not is_water(match_res) and (
                        is_hba(match_res) or is_hbd(match_res))
                ]

                # intersect between residues sharing the same interacting water
                for index_one in range(len(water_neighbors)):
                    water_pair_one = water_neighbors[index_one]

                    for index_two in [
                            index for index in range(index_one +
                                                     1, len(water_neighbors))
                            if water_pair_one[0] == water_neighbors[index][0]
                    ]:
                        water_pair_two = water_neighbors[index_two]
                        res_1 = water_pair_one[1]
                        res_2 = water_pair_two[1]

                        # TODO: order residues + check minimum spacing between residues
                        key = res_1.get_parent().get_id() + str(res_1.get_id(
                        )[1]) + "_" + res_2.get_parent().get_id() + str(
                            res_2.get_id()[1])

                        # Verify h-bonds between water and both residues
                        matches_one = InteractingPair.verify_water_hbond(
                            water_pair_one[1], water_pair_one[0])
                        matches_two = InteractingPair.verify_water_hbond(
                            water_pair_two[1], water_pair_two[0])
                        if len(matches_one) > 0 and len(matches_two) > 0:
                            # if not exists, create residue pair without interactions
                            if not key in interaction_pairs:
                                interaction_pairs[key] = InteractingPair(
                                    res_1, res_2, dbres[res_1.id[1]],
                                    dbres[res_2.id[1]], struc)

                            for a, b in zip(matches_one, matches_two):
                                # HACK: store water ID as part of first atom name
                                interaction_pairs[key].interactions.append(
                                    WaterMediated(
                                        a +
                                        "|" + str(water_pair_one[0].get_parent(
                                        ).get_id()[1]), b))

            for p in classified:
                p.save_into_database()

        if do_complexes:
            for pair in classified_complex:
                pair.save_into_database()

        if do_distances:
            # Distance.objects.filter(structure=struc).all().delete()
            bulk_distances = []
            for i, d in enumerate(distances):
                distance = Distance(distance=int(100 * d[2]),
                                    res1=d[0],
                                    res2=d[1],
                                    gn1=d[3],
                                    gn2=d[4],
                                    gns_pair='_'.join([d[3], d[4]]),
                                    structure=struc)
                bulk_distances.append(distance)
                if len(bulk_distances) > 1000:
                    pairs = Distance.objects.bulk_create(bulk_distances)
                    bulk_distances = []

            pairs = Distance.objects.bulk_create(bulk_distances)
    return classified, distances
Пример #34
0
parser = PDBParser(PERMISSIVE=1)

print ('Parsing', args.pdb_file)

# load structure from PDB file of PDB ifle handler
st = parser.get_structure('STR', args.pdb_file.name)

# collecting atom candidates
bck_atoms=[]

for at in st.get_atoms():
    if at.id in PEP_BOND_ATS :
        bck_atoms.append(at)

print (len(bck_atoms), 'candidate atoms found')

# Preparing search
nbsearch = NeighborSearch(bck_atoms)

at_pairs =  nbsearch.search_all(COVLNK)


# Output sorted by atom,serial_number, nbsearch returns ordered pairs
# Redirect the output with > output_list
for at1, at2 in sorted(at_pairs, key=lambda at_pair: at_pair[0].serial_number):
    # Discard same residue
    if at1.get_parent() == at2.get_parent():
        continue
    print ('{:11} : {:11} {:8.3f}'.format(atom_id(at1),atom_id(at2), at1-at2))