Пример #1
0
    def test_neighbor_search(self):
        """NeighborSearch: Find nearby randomly generated coordinates.

        Based on the self test in Bio.PDB.NeighborSearch.
        """

        # This comment stops black style adding a blank line here, which causes flake8 D202.
        class RandomAtom:
            def __init__(self):
                self.coord = 100 * random(3)

            def get_coord(self):
                return self.coord

        for i in range(0, 20):
            atoms = [RandomAtom() for j in range(100)]
            ns = NeighborSearch(atoms)
            hits = ns.search_all(5.0)
            self.assertTrue(isinstance(hits, list), hits)
            self.assertTrue(len(hits) >= 0, hits)
        x = array([250, 250, 250])  # Far away from our random atoms
        self.assertEqual([], ns.search(x, 5.0, "A"))
        self.assertEqual([], ns.search(x, 5.0, "R"))
        self.assertEqual([], ns.search(x, 5.0, "C"))
        self.assertEqual([], ns.search(x, 5.0, "M"))
        self.assertEqual([], ns.search(x, 5.0, "S"))
Пример #2
0
def getLigandNbrs(resids: List[Residue],
                  struct: Structure) -> List[ResidueDict]:
    """KDTree search the neighbors of a given list of residues(which constitue a ligand) 
    and return unique having tagged them with a ban identifier proteins within 5 angstrom of these residues. """
    ns = NeighborSearch(list(struct.get_atoms()))
    nbrs = []

    for r in resids:
        # a ligand consists of residues
        resatoms = r.child_list[0]
        #  each residue has an atom plucked at random
        for nbrresidues in ns.search(resatoms.get_coord(), 5, level='R'):
            # we grab all residues in radius around that atom and extend the list of neighbors with those
            nbrs.extend([nbrresidues])

    # Filter out the residues that constitute the ligand itself
    filtered = []
    for neighbor in nbrs:
        present = 0
        for constit in resids:
            if ResidueDict(constit) == ResidueDict(neighbor):
                present = 1
        if present == 0:
            filtered.append(ResidueDict(neighbor))

    return [*map(lambda x: addBanClass(x), set(filtered))]
Пример #3
0
 def test_neighbor_search(self):
     """NeighborSearch: Find nearby randomly generated coordinates.
      
     Based on the self test in Bio.PDB.NeighborSearch.
     """
     class RandomAtom:
         def __init__(self):
             self.coord = 100 * random(3)
         def get_coord(self):
             return self.coord
     for i in range(0, 20):
         atoms = [RandomAtom() for j in range(100)]
         ns = NeighborSearch(atoms)
         hits = ns.search_all(5.0)
         self.assertTrue(isinstance(hits, list), hits)
         self.assertTrue(len(hits) >= 0, hits)
     x = array([250,250,250]) #Far away from our random atoms
     self.assertEqual([], ns.search(x, 5.0, "A"))
     self.assertEqual([], ns.search(x, 5.0, "R"))
     self.assertEqual([], ns.search(x, 5.0, "C"))
     self.assertEqual([], ns.search(x, 5.0, "M"))
     self.assertEqual([], ns.search(x, 5.0, "S"))
    def test_neighbor_search(self):
        """NeighborSearch: Find nearby randomly generated coordinates.

        Based on the self test in Bio.PDB.NeighborSearch.
        """
        class RandomAtom:
            def __init__(self):
                self.coord = 100 * random(3)

            def get_coord(self):
                return self.coord

        for i in range(0, 20):
            atoms = [RandomAtom() for j in range(100)]
            ns = NeighborSearch(atoms)
            hits = ns.search_all(5.0)
            self.assertIsInstance(hits, list)
            self.assertGreaterEqual(len(hits), 0)
        x = array([250, 250, 250])  # Far away from our random atoms
        self.assertEqual([], ns.search(x, 5.0, "A"))
        self.assertEqual([], ns.search(x, 5.0, "R"))
        self.assertEqual([], ns.search(x, 5.0, "C"))
        self.assertEqual([], ns.search(x, 5.0, "M"))
        self.assertEqual([], ns.search(x, 5.0, "S"))
Пример #5
0
def getLigandNbrs(resids: List[Residue], struct:Structure):
    ns   = NeighborSearch(list( struct.get_atoms() ))
    nbrs = []
    for r in resids:
        # a ligand consists of residues
        resatom = r.child_list[0]
        #  each residue has an atom plucked at random
        for nbr in ns.search(resatom.get_coord(), 5,level='R'):
            # we grab all residues in radius around that atom and extend the list with those
            nbrs.extend([* nbr])
    filtered = [] 
    for neighor in nbrs:
        present = 0
        for constit in resids:
            if ResidueId( constit ) == ResidueId( neighor ):
                present = 1
        if present == 0:
            filtered.append(ResidueId(neighor))
    return [ * map(lambda x: addBanClass(x) ,  set(filtered) ) ]
Пример #6
0
def getResidueNeighbors(struct:Structure, resid: ResidueFullIdDict, radius:float, level:str = 'R', all_levels=False)-> List[Residue] or List[Chain] or List[Chain or Residue]:

    """
    struct: opened cif structure
    resid: A dictionary containing the residue's identifiers (see associated class)
    radiue: radius of neighborhood to check
    level: Atom, Residue, Chain : one of A/R/C
    """

    if level.upper() not in ['A', 'R', 'C']:
        print('Level has to be one of A R C')
        raise Error

    parentStrand      :Chain         = struct[resid.model][resid.strand_id]
    residuesOfInterest:List[Residue] = list(filter(lambda x : x.get_full_id()[3][1] == resid.residue_id, parentStrand.child_list))

    ligandAtoms = residuesOfInterest[0].get_atoms()

    coords      = list( map(lambda atom: atom.get_coord(), ligandAtoms) )
    ns          = NeighborSearch(list( struct.get_atoms() ))

    yield ns.search(coords[0],radius,level.upper())
Пример #7
0
def compute_interactions(pdb_name, save_to_db=False):

    do_distances = True
    do_interactions = True
    distances = []
    classified = []

    # Ensure that the PDB name is lowercase
    pdb_name = pdb_name.lower()

    # Get the pdb structure
    struc = Structure.objects.get(
        protein_conformation__protein__entry_name=pdb_name)
    pdb_io = StringIO(struc.pdb_data.pdb)
    # Get the preferred chain
    preferred_chain = struc.preferred_chain.split(',')[0]

    # Get the Biopython structure for the PDB
    s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0]
    #s = pdb_get_structure(pdb_name)[0]
    chain = s[preferred_chain]
    #return classified, distances

    # remove residues without GN and only those matching receptor.
    residues = struc.protein_conformation.residue_set.exclude(
        generic_number=None).all().prefetch_related('generic_number')
    dbres = {}
    dblabel = {}
    for r in residues:
        dbres[r.sequence_number] = r
        dblabel[r.sequence_number] = r.generic_number.label
    ids_to_remove = []
    for res in chain:
        if not res.id[1] in dbres.keys() and res.get_resname() != "HOH":
            ids_to_remove.append(res.id)
    for i in ids_to_remove:
        chain.detach_child(i)

    if do_distances:
        for i1, res1 in enumerate(chain, 1):
            if not is_water(res1):
                for i2, res2 in enumerate(chain, 1):
                    if i2 > i1 and not is_water(res2):
                        # Do not calculate twice.
                        distance = res1['CA'] - res2['CA']
                        distances.append(
                            (dbres[res1.id[1]], dbres[res2.id[1]], distance,
                             dblabel[res1.id[1]], dblabel[res2.id[1]]))

    if do_interactions:
        atom_list = Selection.unfold_entities(s[preferred_chain], 'A')

        # Search for all neighbouring residues
        ns = NeighborSearch(atom_list)
        all_neighbors = ns.search_all(6.6, "R")

        # Filter all pairs containing non AA residues
        all_aa_neighbors = [
            pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])
        ]

        # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart
        all_aa_neighbors = [
            pair for pair in all_aa_neighbors
            if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES
        ]

        # For each pair of interacting residues, determine the type of interaction
        interactions = [
            InteractingPair(res_pair[0], res_pair[1], dbres[res_pair[0].id[1]],
                            dbres[res_pair[1].id[1]], struc)
            for res_pair in all_aa_neighbors
            if not is_water(res_pair[0]) and not is_water(res_pair[1])
        ]

        # Split unto classified and unclassified.
        classified = [
            interaction for interaction in interactions
            if len(interaction.get_interactions()) > 0
        ]

    if save_to_db:

        if do_interactions:
            # Delete previous for faster load in
            InteractingResiduePair.objects.filter(
                referenced_structure=struc).all().delete()

            # bulk_pair = []
            # for d in distances:
            #     pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc)
            #     bulk_pair.append(pair)

            # Create interaction dictionary
            interaction_pairs = {}
            for pair in classified:
                res_1 = pair.get_residue_1()
                res_2 = pair.get_residue_2()
                key = res_1.get_parent().get_id() + str(res_1.get_id(
                )[1]) + "_" + res_2.get_parent().get_id() + str(
                    res_2.get_id()[1])
                interaction_pairs[key] = pair

            # POSSIBLE ADDON: support for multiple water-mediated bonds
            ## Obtain list of water molecules
            water_list = {
                water
                for residue in s[preferred_chain]
                if residue.get_resname() == "HOH"
                for water in residue.get_atoms()
            }
            if len(water_list) > 0:
                ## Iterate water molecules over residue atom list
                water_neighbors = [
                    (water, match_res) for water in water_list
                    for match_res in ns.search(water.coord, 3.5, "R")
                    if not is_water(match_res) and (
                        is_hba(match_res) or is_hbd(match_res))
                ]

                # intersect between residues sharing the same interacting water
                for index_one in range(len(water_neighbors)):
                    water_pair_one = water_neighbors[index_one]

                    for index_two in [
                            index for index in range(index_one +
                                                     1, len(water_neighbors))
                            if water_pair_one[0] == water_neighbors[index][0]
                    ]:
                        water_pair_two = water_neighbors[index_two]
                        res_1 = water_pair_one[1]
                        res_2 = water_pair_two[1]

                        # TODO: order residues + check minimum spacing between residues
                        key = res_1.get_parent().get_id() + str(res_1.get_id(
                        )[1]) + "_" + res_2.get_parent().get_id() + str(
                            res_2.get_id()[1])

                        # Verify h-bonds between water and both residues
                        matches_one = InteractingPair.verify_water_hbond(
                            water_pair_one[1], water_pair_one[0])
                        matches_two = InteractingPair.verify_water_hbond(
                            water_pair_two[1], water_pair_two[0])
                        if len(matches_one) > 0 and len(matches_two) > 0:
                            # if not exists, create residue pair without interactions
                            if not key in interaction_pairs:
                                interaction_pairs[key] = InteractingPair(
                                    res_1, res_2, dbres[res_1.id[1]],
                                    dbres[res_2.id[1]], struc)

                            for a, b in zip(matches_one, matches_two):
                                # HACK: store water ID as part of first atom name
                                interaction_pairs[key].interactions.append(
                                    WaterMediated(
                                        a +
                                        "|" + str(water_pair_one[0].get_parent(
                                        ).get_id()[1]), b))

            for p in classified:
                p.save_into_database()

        if do_distances:
            # Distance.objects.filter(structure=struc).all().delete()
            bulk_distances = []
            for i, d in enumerate(distances):
                distance = Distance(distance=int(100 * d[2]),
                                    res1=d[0],
                                    res2=d[1],
                                    gn1=d[3],
                                    gn2=d[4],
                                    gns_pair='_'.join([d[3], d[4]]),
                                    structure=struc)
                bulk_distances.append(distance)
                if len(bulk_distances) > 1000:
                    pairs = Distance.objects.bulk_create(bulk_distances)
                    bulk_distances = []

            pairs = Distance.objects.bulk_create(bulk_distances)

    return classified, distances
Пример #8
0
def compute_interactions(pdb_name, save_to_db=False):

    do_distances = True
    do_interactions = True
    do_complexes = True
    distances = []
    classified = []
    classified_complex = []

    # Ensure that the PDB name is lowercase
    pdb_name = pdb_name.lower()

    # Get the pdb structure
    struc = Structure.objects.get(
        protein_conformation__protein__entry_name=pdb_name)
    pdb_io = StringIO(struc.pdb_data.pdb)
    # Get the preferred chain
    preferred_chain = struc.preferred_chain.split(',')[0]

    # Get the Biopython structure for the PDB
    s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0]
    #s = pdb_get_structure(pdb_name)[0]
    chain = s[preferred_chain]
    #return classified, distances

    # remove residues without GN and only those matching receptor.
    residues = struc.protein_conformation.residue_set.exclude(
        generic_number=None).all().prefetch_related('generic_number')
    dbres = {}
    dblabel = {}
    for r in residues:
        dbres[r.sequence_number] = r
        dblabel[r.sequence_number] = r.generic_number.label
    ids_to_remove = []
    for res in chain:
        if not res.id[1] in dbres.keys() and res.get_resname() != "HOH":
            ids_to_remove.append(res.id)
    for i in ids_to_remove:
        chain.detach_child(i)

    if do_distances:
        for i1, res1 in enumerate(chain, 1):
            if not is_water(res1):
                for i2, res2 in enumerate(chain, 1):
                    if i2 > i1 and not is_water(res2):
                        # Do not calculate twice.
                        distance = res1['CA'] - res2['CA']
                        distances.append(
                            (dbres[res1.id[1]], dbres[res2.id[1]], distance,
                             dblabel[res1.id[1]], dblabel[res2.id[1]]))

    if do_interactions:
        atom_list = Selection.unfold_entities(s[preferred_chain], 'A')

        # Search for all neighbouring residues
        ns = NeighborSearch(atom_list)
        all_neighbors = ns.search_all(6.6, "R")

        # Filter all pairs containing non AA residues
        all_aa_neighbors = [
            pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])
        ]

        # Only include contacts between residues more than NUM_SKIP_RESIDUES sequence steps apart
        all_aa_neighbors = [
            pair for pair in all_aa_neighbors
            if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES
        ]

        # For each pair of interacting residues, determine the type of interaction
        interactions = [
            InteractingPair(res_pair[0], res_pair[1], dbres[res_pair[0].id[1]],
                            dbres[res_pair[1].id[1]], struc)
            for res_pair in all_aa_neighbors
            if not is_water(res_pair[0]) and not is_water(res_pair[1])
        ]

        # Split unto classified and unclassified.
        classified = [
            interaction for interaction in interactions
            if len(interaction.get_interactions()) > 0
        ]

    if do_complexes:
        try:
            # check if structure in signprot_complex
            complex = SignprotComplex.objects.get(structure=struc)

            # Get all GPCR residue atoms based on preferred chain
            gpcr_atom_list = [ atom for residue in Selection.unfold_entities(s[preferred_chain], 'R') if is_aa(residue) \
                            for atom in residue.get_atoms()]

            # Get all residue atoms from the coupled protein (e.g. G-protein)
            # NOW: select alpha subnit protein chain using complex model
            sign_atom_list = [ atom for residue in Selection.unfold_entities(s[complex.alpha], 'R') if is_aa(residue) \
                                for atom in residue.get_atoms()]

            ns_gpcr = NeighborSearch(gpcr_atom_list)
            ns_sign = NeighborSearch(sign_atom_list)

            # For each GPCR atom perform the neighbor search on the signaling protein
            all_neighbors = {
                (gpcr_atom.parent, match_res)
                for gpcr_atom in gpcr_atom_list
                for match_res in ns_sign.search(gpcr_atom.coord, 4.5, "R")
            }

            # For each pair of interacting residues, determine the type of interaction
            residues_sign = ProteinConformation.objects.get(
                protein__entry_name=pdb_name + "_" +
                complex.alpha.lower()).residue_set.exclude(
                    generic_number=None).all().prefetch_related(
                        'generic_number')

            # grab labels from sign protein
            dbres_sign = {}
            dblabel_sign = {}
            for r in residues_sign:
                dbres_sign[r.sequence_number] = r
                dblabel_sign[r.sequence_number] = r.generic_number.label

            # Find interactions
            interactions = [
                InteractingPair(res_pair[0], res_pair[1],
                                dbres[res_pair[0].id[1]],
                                dbres_sign[res_pair[1].id[1]], struc)
                for res_pair in all_neighbors if res_pair[0].id[1] in dbres
                and res_pair[1].id[1] in dbres_sign
            ]

            # Filter unclassified interactions
            classified_complex = [
                interaction for interaction in interactions
                if len(interaction.get_interactions()) > 0
            ]

            # Convert to dictionary for water calculations
            interaction_pairs = {}
            for pair in classified_complex:
                res_1 = pair.get_residue_1()
                res_2 = pair.get_residue_2()
                key = res_1.get_parent().get_id() + str(res_1.get_id(
                )[1]) + "_" + res_2.get_parent().get_id() + str(
                    res_2.get_id()[1])
                interaction_pairs[key] = pair

            # Obtain list of all water molecules in the structure
            water_list = {
                water
                for chain in s for residue in chain
                if residue.get_resname() == "HOH"
                for water in residue.get_atoms()
            }

            # If waters are present calculate water-mediated interactions
            if len(water_list) > 0:
                ## Iterate water molecules over coupled and gpcr atom list
                water_neighbors_gpcr = {
                    (water, match_res)
                    for water in water_list
                    for match_res in ns_gpcr.search(water.coord, 3.5, "R")
                }

                water_neighbors_sign = {
                    (water, match_res)
                    for water in water_list
                    for match_res in ns_sign.search(water.coord, 3.5, "R")
                }

                # TODO: DEBUG AND VERIFY this code as water-mediated interactions were present at this time
                # 1. UPDATE complexes to include also mini Gs and peptides (e.g. 4X1H/6FUF/5G53)
                # 2. Run and verify water-mediated do_interactions
                # 3. Improve the intersection between the two hit lists

                ## TODO: cleaner intersection between hits from the two Lists
                # see new code below
#                for water_pair_one in water_neighbors_gpcr:
#                    for water_pair_two in water_neighbors_sign:
#                        if water_pair_one[0]==water_pair_two[0]:
#                            res_1 = water_pair_one[1]
#                            res_2 = water_pair_two[1]
#                            key =  res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1])

# Check if interaction is polar
#                            if any(get_polar_interactions(water_pair_one[0].get_parent(), water_pair_one[1])) and any(get_polar_interactions(water_pair_two[0].get_parent(), water_pair_two[1])):
# TODO Check if water interaction is already present (e.g. multiple waters)
# TODO Is splitting of sidechain and backbone-mediated interactions desired?
#                                if not key in interaction_pairs:
#                                    interaction_pairs[key] = InteractingPair(res_1, res_2, dbres[res_1.id[1]], dbres_sign[res_2.id[1]], struc)

# TODO: fix assignment of interacting atom labels (now seems limited to residues)
#                                interaction_pairs[key].interactions.append(WaterMediated(a + "|" + str(water_pair_one[0].get_parent().get_id()[1]), b))

        except SignprotComplex.DoesNotExist:
            #            print("No complex definition found for", pdb_name)
            log = "No complex definition found for " + pdb_name
        except ProteinConformation.DoesNotExist:
            print(
                "No protein conformation definition found for signaling protein of ",
                pdb_name)
#            log = "No protein conformation definition found for signaling protein of " + pdb_name

    if save_to_db:

        if do_interactions:
            # Delete previous for faster load in
            InteractingResiduePair.objects.filter(
                referenced_structure=struc).all().delete()

            # bulk_pair = []
            # for d in distances:
            #     pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc)
            #     bulk_pair.append(pair)

            # Create interaction dictionary
            interaction_pairs = {}
            for pair in classified:
                res_1 = pair.get_residue_1()
                res_2 = pair.get_residue_2()
                key = res_1.get_parent().get_id() + str(res_1.get_id(
                )[1]) + "_" + res_2.get_parent().get_id() + str(
                    res_2.get_id()[1])
                interaction_pairs[key] = pair

            # POSSIBLE ADDON: support for multiple water-mediated bonds
            ## Obtain list of water molecules
            water_list = {
                water
                for residue in s[preferred_chain]
                if residue.get_resname() == "HOH"
                for water in residue.get_atoms()
            }
            if len(water_list) > 0:
                ## Iterate water molecules over residue atom list
                water_neighbors = [
                    (water, match_res) for water in water_list
                    for match_res in ns.search(water.coord, 3.5, "R")
                    if not is_water(match_res) and (
                        is_hba(match_res) or is_hbd(match_res))
                ]

                # intersect between residues sharing the same interacting water
                for index_one in range(len(water_neighbors)):
                    water_pair_one = water_neighbors[index_one]

                    for index_two in [
                            index for index in range(index_one +
                                                     1, len(water_neighbors))
                            if water_pair_one[0] == water_neighbors[index][0]
                    ]:
                        water_pair_two = water_neighbors[index_two]
                        res_1 = water_pair_one[1]
                        res_2 = water_pair_two[1]

                        # TODO: order residues + check minimum spacing between residues
                        key = res_1.get_parent().get_id() + str(res_1.get_id(
                        )[1]) + "_" + res_2.get_parent().get_id() + str(
                            res_2.get_id()[1])

                        # Verify h-bonds between water and both residues
                        matches_one = InteractingPair.verify_water_hbond(
                            water_pair_one[1], water_pair_one[0])
                        matches_two = InteractingPair.verify_water_hbond(
                            water_pair_two[1], water_pair_two[0])
                        if len(matches_one) > 0 and len(matches_two) > 0:
                            # if not exists, create residue pair without interactions
                            if not key in interaction_pairs:
                                interaction_pairs[key] = InteractingPair(
                                    res_1, res_2, dbres[res_1.id[1]],
                                    dbres[res_2.id[1]], struc)

                            for a, b in zip(matches_one, matches_two):
                                # HACK: store water ID as part of first atom name
                                interaction_pairs[key].interactions.append(
                                    WaterMediated(
                                        a +
                                        "|" + str(water_pair_one[0].get_parent(
                                        ).get_id()[1]), b))

            for p in classified:
                p.save_into_database()

        if do_complexes:
            for pair in classified_complex:
                pair.save_into_database()

        if do_distances:
            # Distance.objects.filter(structure=struc).all().delete()
            bulk_distances = []
            for i, d in enumerate(distances):
                distance = Distance(distance=int(100 * d[2]),
                                    res1=d[0],
                                    res2=d[1],
                                    gn1=d[3],
                                    gn2=d[4],
                                    gns_pair='_'.join([d[3], d[4]]),
                                    structure=struc)
                bulk_distances.append(distance)
                if len(bulk_distances) > 1000:
                    pairs = Distance.objects.bulk_create(bulk_distances)
                    bulk_distances = []

            pairs = Distance.objects.bulk_create(bulk_distances)
    return classified, distances
Пример #9
0
    def find_ligands(self, radius=3):
        """
        Searches the parent structure for hetero components close to the
        catalytic residues, by searching around the atoms of catalytic residues
        and the dummy atoms between distant residues. Populates the ligands list 
        with Het objects.

        Args:
            radius: the search space (in Å) around the atoms of the catalytic residues
        """
        if type(self.parent_structure) != Structure:
            return
        # Get centers of search
        centers = self._get_ligand_search_centers(radius)
        # Initialize KD tree
        query_atoms = Bio.PDB.Selection.unfold_entities(
            self.parent_structure, 'A')
        ns = NeighborSearch(query_atoms)
        # Search for ligands around each center
        polymers = defaultdict(list)
        site_chains = set([res.chain for res in self])
        seen = set()
        added = set()
        # Search for components close to catalytic residues
        for center in centers:
            hits = ns.search(center, radius, level='R')
            for res in hits:
                if res.get_full_id() in seen:
                    continue
                seen.add(res.get_full_id())
                restype = res.get_id()[0][0]
                chain = res.get_parent().get_id()
                # Ignore waters
                if restype == 'W':
                    continue
                # HET components
                if restype == 'H':
                    self.add(
                        Het(self.mcsa_id,
                            self.pdb_id,
                            res.get_resname(),
                            res.get_id()[1],
                            chain,
                            structure=res,
                            parent_site=self))
                    added.add(res.get_full_id())
                # Protein/nucleic polymer components
                if restype == ' ' and chain not in site_chains:
                    polymers[chain].append(res)
        # Build polymers
        if self.acts_on_polymer:
            for chain, reslist in polymers.items():
                self.add(
                    Het.polymer(reslist, self.mcsa_id, self.pdb_id, chain,
                                self))
        # Find distal co-factor-like or substrate-like molecules
        hits = ns.search(self.structure.center_of_mass(geometric=True),
                         30,
                         level='R')
        for res in hits:
            restype = res.get_id()[0][0]
            if restype == 'H' and res.get_full_id() not in added:
                ligand = Het(self.mcsa_id,
                             self.pdb_id,
                             res.get_resname(),
                             res.get_id()[1],
                             res.get_parent().get_id(),
                             structure=res,
                             parent_site=self)
                if ligand.type in ('Substrate (non-polymer)',
                                   'Co-factor (non-ion)'):
                    ligand.is_distal = True
                    self.add(ligand)
        return
Пример #10
0
def compute_interactions(pdb_name,save_to_db = False):

    do_distances = True
    do_interactions = True
    distances = []
    classified = []

    # Ensure that the PDB name is lowercase
    pdb_name = pdb_name.lower()

    # Get the pdb structure
    struc = Structure.objects.get(protein_conformation__protein__entry_name=pdb_name)
    pdb_io = StringIO(struc.pdb_data.pdb)
    # Get the preferred chain
    preferred_chain = struc.preferred_chain.split(',')[0]

    # Get the Biopython structure for the PDB
    s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0]
    #s = pdb_get_structure(pdb_name)[0]
    chain = s[preferred_chain]
    #return classified, distances

     # remove residues without GN and only those matching receptor.
    residues = struc.protein_conformation.residue_set.exclude(generic_number=None).all().prefetch_related('generic_number')
    dbres = {}
    dblabel = {}
    for r in residues:
        dbres[r.sequence_number] = r
        dblabel[r.sequence_number] = r.generic_number.label
    ids_to_remove = []
    for res in chain:
        if not res.id[1] in dbres.keys() and res.get_resname() != "HOH":
            ids_to_remove.append(res.id)
    for i in ids_to_remove:
        chain.detach_child(i)

    if do_distances:
        for i1,res1 in enumerate(chain,1):
            if not is_water(res1):
                for i2,res2 in enumerate(chain,1):
                    if i2>i1 and not is_water(res2):
                        # Do not calculate twice.
                        distance = res1['CA']-res2['CA']
                        distances.append((dbres[res1.id[1]],dbres[res2.id[1]],distance,dblabel[res1.id[1]],dblabel[res2.id[1]]))

    if do_interactions:
        atom_list = Selection.unfold_entities(s[preferred_chain], 'A')
        # Search for all neighbouring residues
        ns = NeighborSearch(atom_list)
        all_neighbors = ns.search_all(4.5, "R")

        # Filter all pairs containing non AA residues
        all_aa_neighbors = [pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])]

        # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart
        all_aa_neighbors = [pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES]

        # For each pair of interacting residues, determine the type of interaction
        interactions = [InteractingPair(res_pair[0], res_pair[1], get_interactions(res_pair[0], res_pair[1]),dbres[res_pair[0].id[1]], dbres[res_pair[1].id[1]],struc) for res_pair in all_aa_neighbors]

        # Split unto classified and unclassified.
        classified = [interaction for interaction in interactions if len(interaction.get_interactions()) > 0]

    if save_to_db: 

        if do_interactions:
            # Delete previous for faster load in
            InteractingResiduePair.objects.filter(referenced_structure=struc).all().delete()

            # bulk_pair = []
            # for d in distances:
            #     pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc)
            #     bulk_pair.append(pair)

            # Create interaction dictionary
            interaction_pairs = {}
            for pair in classified:
                res_1 = pair.get_residue_1()
                res_2 = pair.get_residue_2()
                key =  res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1])
                interaction_pairs[key] = pair

            # POSSIBLE ADDON: support for multiple water-mediated bonds
            ## Obtain list of water molecules
            water_list = { water for residue in s[preferred_chain] if residue.get_resname() == "HOH" for water in residue.get_atoms() }
            if len(water_list) > 0:
                ## Iterate water molecules over residue atom list
                water_neighbors = [(water, match_res) for water in water_list
                                for match_res in ns.search(water.coord, 3.5, "R") if not is_water(match_res)]

                # intersect between residues sharing the same interacting water
                for index_one in range(len(water_neighbors)):
                    water_pair_one = water_neighbors[index_one]

                    for index_two in [ index for index in range(index_one+1, len(water_neighbors)) if water_pair_one[0]==water_neighbors[index][0] ]:
                        water_pair_two = water_neighbors[index_two]
                        res_1 = water_pair_one[1]
                        res_2 = water_pair_two[1]
                        key =  res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1])

                        # Check if interaction is polar - NOTE: this is not capturing every angle
                        if any(get_polar_interactions(water_pair_one[0].get_parent(), water_pair_one[1])) and any(get_polar_interactions(water_pair_two[0].get_parent(), water_pair_two[1])):
                            # NOTE: Is splitting of sidechain and backbone-mediated interactions desired?
                            if key in interaction_pairs:
                                interaction_pairs[key].interactions.append(WaterMediated())
                            else:
                                interaction_pairs[key] = InteractingPair(res_1, res_2, [WaterMediated()], dbres[res_1.id[1]], dbres[res_2.id[1]], struc)

            for p in classified:
                p.save_into_database()

        if do_distances:
            # Distance.objects.filter(structure=struc).all().delete()
            bulk_distances = []
            for i,d in enumerate(distances):
                distance = Distance(distance=int(100*d[2]),res1=d[0], res2=d[1],gn1=d[3], gn2=d[4], gns_pair='_'.join([d[3],d[4]]), structure=struc)
                bulk_distances.append(distance)
                if len(bulk_distances)>1000:
                    pairs = Distance.objects.bulk_create(bulk_distances)
                    bulk_distances = []

            pairs = Distance.objects.bulk_create(bulk_distances)



    return classified, distances