Пример #1
0
    def get_residue_list(self, pdb_struct, domain, getchainid=None):
        """
        Return list of Bio.PDB Residue objects in this domain, and optionally
        in the specified chain.,

        Parameters:
             pdb_struct - Bio.PDB parsed PDB struct for the protein
             domain -  PTDomain (ptdomain.py) object listing the segment(s)
                         that make up this domain (only one domain processed at a
                         time).
             getchainid - chain identifier to get residues in (default None -
                       all chains).

        Return value:
             list of Bio.PDB Residue objects in the domain (and optionally chain).
        Raises exceptions:
           NoSSE_Exception for empty structure (happens eg on d1oayi_.ent)

        """
        residue_list = []
        try:
            pdb_model = self.pdb_struct[0]  # TODO always using model 0 for now
        except KeyError:
            raise NoSSE_Exception

        for chain in pdb_model:
            chainid = ptsecstruct.pdb_chainid_to_stride_chainid(chain.get_id())
            if getchainid and getchainid != chainid:
                continue  # this is not the chain we want

            # Build a list of Bio.PDB Residue objects that are in this
            # domain.
            # id of a residue in Bio.PDB is tuple (hetatm, resseqnum, icode)
            # so we choose those where residue PDB number
            # (in the current chain) is in the domain.
            # TODO: maybe should use polypeptide builder for this instead
            # (and indeed should probably use it right from the beginning) -
            residue_list += [
                residue for residue in chain.get_unpacked_list() if
                is_aa(residue) and domain.is_in_domain(chainid,
                                                       residue.get_id()[1])
            ]
            if getchainid:
                break  # if getchainid specified, we now have it so can quit
        return residue_list
    def get_residue_list(self, pdb_struct, domain, getchainid=None):
        """
        Return list of Bio.PDB Residue objects in this domain, and optionally
        in the specified chain.,

        Parameters:
             pdb_struct - Bio.PDB parsed PDB struct for the protein
             domain -  PTDomain (ptdomain.py) object listing the segment(s)
                         that make up this domain (only one domain processed at a
                         time).
             getchainid - chain identifier to get residues in (default None -
                       all chains).

        Return value:
             list of Bio.PDB Residue objects in the domain (and optionally chain).
        Raises exceptions:
           NoSSE_Exception for empty structure (happens eg on d1oayi_.ent)

        """
        residue_list = []
        try:
            pdb_model = self.pdb_struct[0]  # TODO always using model 0 for now
        except KeyError:
            raise NoSSE_Exception

        for chain in pdb_model:
            chainid = ptsecstruct.pdb_chainid_to_stride_chainid(chain.get_id())
            if getchainid and getchainid != chainid:
                continue  # this is not the chain we want

            # Build a list of Bio.PDB Residue objects that are in this
            # domain.
            # id of a residue in Bio.PDB is tuple (hetatm, resseqnum, icode)
            # so we choose those where residue PDB number
            # (in the current chain) is in the domain.
            # TODO: maybe should use polypeptide builder for this instead
            # (and indeed should probably use it right from the beginning) -
            residue_list += [
                residue
                for residue in chain.get_unpacked_list()
                if is_aa(residue) and domain.is_in_domain(chainid, residue.get_id()[1])
            ]
            if getchainid:
                break  # if getchainid specified, we now have it so can quit
        return residue_list
Пример #3
0
 def accept_residue(self, residue):
     """
     overrides the base accept_residue() function to accept only
     residues in our domain. Also reject HETATMS.
     Paramteters:
        residue - Bio.PDB Residue object of residue to test
     Return value:
        1 to accept residue, 0 to reject.
     """
     chain = residue.get_parent()
     chainid = pdb_chainid_to_stride_chainid(chain.get_id())
     # id of a residue in Bio.PDB is tuple (hetatm, resseqnum, icode)
     # so we choose those where chain and residue PDB number
     # is in the domain.
     resnum = residue.get_id()[1]
     if (self.domain.is_in_domain(chainid, resnum)
             and residue.get_id()[0] == ' '):
         return 1
     else:
         return 0
Пример #4
0
 def accept_residue(self, residue):
     """
     overrides the base accept_residue() function to accept only
     residues in our domain. Also reject HETATMS.
     Paramteters:
        residue - Bio.PDB Residue object of residue to test
     Return value:
        1 to accept residue, 0 to reject.
     """
     chain = residue.get_parent()
     chainid = pdb_chainid_to_stride_chainid(chain.get_id())
     # id of a residue in Bio.PDB is tuple (hetatm, resseqnum, icode)
     # so we choose those where chain and residue PDB number
     # is in the domain.
     resnum = residue.get_id()[1]
     if (self.domain.is_in_domain(chainid, resnum) and
         residue.get_id()[0] == ' '):
         return 1
     else:
         return 0
Пример #5
0
    def build_graph_from_secstruct(self,
                                   secstruct,
                                   domain,
                                   chainid=None,
                                   ignore_insertion_codes=False):
        """
        Build the list of nodes from the the supplied PTSecStruct
        object. 


        Parameters:
            secstruct - PTSecStruct (ptsecstruct.py) object to build from
            domain - PTDomain (ptdomain.py) object listing the segment(s)
                     that make up this domain (only one domain processed at a
                     time).
                     (in/out) NOTE: may be modified by having a segment
                     added if SSE is only partly in domain.
            chainid - chain identifier to build graph for only this chain,
                      or None for all chains (default)
            ignore_insertion_codes - If True, a hack to make it work with
                      PMML (only) which does not report insertion codes
                      unlike DSSP and STRIDE

        Uses member data (write):
            chain_dict - dict of { chainid : node_list } where node_list is
                          list of nodes in order, built in this function
            secstruct - keeps a pointer to the supplied secstruct

          (readonly):
            pdb_struct - The Bio.PDB parsed PDB struct (atomic co-ordinates)
                         for this protein.
            include_310_helices, include_pi_helices - if true, include
                         these kinds of helices.

        Raises exceptions:
           NoSSE_Exception if no helices or strands found
        
        Return value:
            None.
            
        """

        self.secstruct = secstruct

        helix_num = 1
        strand_num = 1

        num_helices_in_domain = 0
        num_strands_in_domain = 0

        #
        # Build dictionary mapping (chainid, pdb_resid) to index in residue_list
        # for ALL residues, not just those in this domain.
        #
        self.residue_list = self.get_residue_list(self.pdb_struct,
                                                  PTDomain(None, None))
        self.pdb_resid_dict = {}
        seq_indx = 0
        while seq_indx < len(self.residue_list):
            residue = self.residue_list[seq_indx]
            self.pdb_resid_dict[(ptsecstruct.pdb_chainid_to_stride_chainid(
                residue.get_full_id()[2]),
                                 biopdbresid_to_pdbresseq(
                                     residue.get_id(),
                                     ignore_insertion_codes))] = seq_indx
            seq_indx += 1

        # Note that now we are only adding elements in the supplied domain,
        # so the so-called 'chains' may really be segments, i.e. subsequences
        # of chains (rest of chain may be in other domain(s)

        self.chain_dict = {}  # dict of {chainid : node_list}

        for (start_chainid, start_resnum, end_chainid, end_resnum, helixtype) \
              in secstruct.helix_list:
            assert (start_chainid == end_chainid)  #helix must be same chain
            if chainid and chainid != start_chainid:
                continue  # chainid specified, skip ones not in that chain
            # will consider structures in domain if first residue is in domain
            if domain.is_in_domain(start_chainid,
                                   get_int_icode(start_resnum)[0]):
                num_helices_in_domain += 1
                if helixtype == "H":
                    idprefix = "ALPHAHELIX_"
                    htype = "ALPHA"
                    this_helix_num = helix_num
                    helix_num += 1
                elif helixtype == "I":
                    if not self.include_pi_helices:
                        continue
                    idprefix = "PIHELIX_"
                    htype = "PI"
                    this_helix_num = helix_num
                    helix_num += 1
                elif helixtype == "G":
                    if not self.include_310_helices:
                        continue
                    idprefix = "310HELIX_"
                    htype = "310"
                    this_helix_num = helix_num
                    helix_num += 1
                else:  # shouldn't happen
                    sys.stderr.write("ERROR: bad helix type " + helixtype +
                                     "\n")
                ah_node = PTNodeHelix(htype,
                                      idprefix + start_chainid+"_" +\
                                      str(this_helix_num),
                                      this_helix_num,
                                      start_resnum, end_resnum, start_chainid,
                                      domain.domainid,
                                      self.residue_list, self.pdb_resid_dict)
                if not self.chain_dict.has_key(start_chainid):
                    self.chain_dict[start_chainid] = []
                self.chain_dict[start_chainid].append(ah_node)

                # we must already have handled the case of SSEs that cross
                # domain boundaries (by moving whole SSE to one of the domains)
                assert (domain.is_in_domain(end_chainid,
                                            get_int_icode(end_resnum)[0]))

        for (start_chainid, start_resnum, end_chainid, end_resnum) \
                in secstruct.strand_list:
            assert (start_chainid == end_chainid)  # must be in same chain
            if chainid and chainid != start_chainid:
                continue  # chainid specified, skip ones not in that chain
            if domain.is_in_domain(start_chainid,
                                   get_int_icode(start_resnum)[0]):
                num_strands_in_domain += 1
                bs_node = PTNodeStrand("STRAND_"+start_chainid +"_"+\
                                       str(strand_num),
                                       strand_num,
                                       start_resnum, end_resnum, start_chainid,
                                       domain.domainid,
                                       self.residue_list,
                                       self.pdb_resid_dict)
                strand_num += 1
                if not self.chain_dict.has_key(start_chainid):
                    self.chain_dict[start_chainid] = []

                # we must already have handled the case of SSEs that cross
                # domain boundaries (by moving whole SSE to one of the domains)
                assert (domain.is_in_domain(end_chainid,
                                            get_int_icode(end_resnum)[0]))
                self.chain_dict[start_chainid].append(bs_node)

        # raise an exception if there are no SSEs at all in this domain
        if num_helices_in_domain == 0 and num_strands_in_domain == 0:
            raise NoSSE_Exception

        delete_chainid_list = []  # list of chainids to delete from chain_dict
        for (chainid, nodelist) in self.chain_dict.iteritems():
            # sort in order of start residue id ascending (all must be disjoint)
            nodelist.sort()

            if len(nodelist) < 1:
                # There are no SSEs in this chain, get rid of it.
                sys.stderr.write('WARNING: no SSEs in chain ' + chainid +
                                 '; chain ignored\n')
                delete_chainid_list.append(
                    chainid)  # don't delete while in loop
                continue
            else:
                # Check for chain with only SSEs that will not be drawn
                # (i.e. pi or 310 helices), and delete those too
                found_useful_node = False
                for ptnode in nodelist:
                    if isinstance(ptnode, PTNodeStrand):
                        found_useful_node = True
                        break
                    elif isinstance(ptnode, PTNodeHelix):
                        if ptnode.get_type() == "ALPHA":
                            found_useful_node = True
                            break
                        elif ((ptnode.get_type() == "310"
                               and self.include_310_helices)
                              or (ptnode.get_type() == "PI"
                                  and self.include_pi_helices)):
                            found_useful_node = True
                            break
                if not found_useful_node:
                    sys.stderr.write(
                        'WARNING: only pi or 310 helices in chain ' + chainid +
                        '; chain ignored\n')
                    delete_chainid_list.append(chainid)
                    continue

        # delete chains from chain_dict that were marked earlier for deletion
        for chainid in delete_chainid_list:
            self.chain_dict.pop(chainid)

        # -------------------------------------------------------------------

        # This is needed only for labelling sheets for HH and KK codes
        # (see dfs_strands() etc. below)

        # add edges for hydrogen bonds
        # uses secstruct and chainid member data
        # these are used for determining which side bridge partners are
        # on (and also for drawing a hydrogen bond graph if requested)
        self.add_hbond_edges_from_secstruct()

        # add edges for bridge partners
        # uses secstruct and chainid member data
        self.add_bridge_edges_from_secstruct()

        #---------------------------------------------------------------------

        # for sequential numbering, we'll build this dictionary mapping
        # sequential number (note NOT restarting for each chain)
        # to PTNode
        # so that sequential numbers as used in ptgraph2 -b sequential
        # option.
        # this is a dictionary of { seqnum : PTNode }
        self.seqnum2node = {}
        for (seqnum, node) in \
            enumerate([node for node in self.iter_nodes() if \
                       not ( (isinstance(node, PTNodeTerminus)) or
                              (isinstance(node, PTNodeHelix) and
                               ( (node.get_type() == "310" and
                                  not self.include_310_helices) or
                                 (node.get_type() == "PI" and
                                  not self.include_pi_helices) ) ) ) ]):
            self.seqnum2node[seqnum + 1] = node  # start at 1 not 0
    def build_graph_from_secstruct(self, secstruct, domain, chainid=None, ignore_insertion_codes=False):
        """
        Build the list of nodes from the the supplied PTSecStruct
        object. 


        Parameters:
            secstruct - PTSecStruct (ptsecstruct.py) object to build from
            domain - PTDomain (ptdomain.py) object listing the segment(s)
                     that make up this domain (only one domain processed at a
                     time).
                     (in/out) NOTE: may be modified by having a segment
                     added if SSE is only partly in domain.
            chainid - chain identifier to build graph for only this chain,
                      or None for all chains (default)
            ignore_insertion_codes - If True, a hack to make it work with
                      PMML (only) which does not report insertion codes
                      unlike DSSP and STRIDE

        Uses member data (write):
            chain_dict - dict of { chainid : node_list } where node_list is
                          list of nodes in order, built in this function
            secstruct - keeps a pointer to the supplied secstruct

          (readonly):
            pdb_struct - The Bio.PDB parsed PDB struct (atomic co-ordinates)
                         for this protein.
            include_310_helices, include_pi_helices - if true, include
                         these kinds of helices.

        Raises exceptions:
           NoSSE_Exception if no helices or strands found
        
        Return value:
            None.
            
        """

        self.secstruct = secstruct

        helix_num = 1
        strand_num = 1

        num_helices_in_domain = 0
        num_strands_in_domain = 0

        #
        # Build dictionary mapping (chainid, pdb_resid) to index in residue_list
        # for ALL residues, not just those in this domain.
        #
        self.residue_list = self.get_residue_list(self.pdb_struct, PTDomain(None, None))
        self.pdb_resid_dict = {}
        seq_indx = 0
        while seq_indx < len(self.residue_list):
            residue = self.residue_list[seq_indx]
            self.pdb_resid_dict[
                (
                    ptsecstruct.pdb_chainid_to_stride_chainid(residue.get_full_id()[2]),
                    biopdbresid_to_pdbresseq(residue.get_id(), ignore_insertion_codes),
                )
            ] = seq_indx
            seq_indx += 1

        # Note that now we are only adding elements in the supplied domain,
        # so the so-called 'chains' may really be segments, i.e. subsequences
        # of chains (rest of chain may be in other domain(s)

        self.chain_dict = {}  # dict of {chainid : node_list}

        for (start_chainid, start_resnum, end_chainid, end_resnum, helixtype) in secstruct.helix_list:
            assert start_chainid == end_chainid  # helix must be same chain
            if chainid and chainid != start_chainid:
                continue  # chainid specified, skip ones not in that chain
            # will consider structures in domain if first residue is in domain
            if domain.is_in_domain(start_chainid, get_int_icode(start_resnum)[0]):
                num_helices_in_domain += 1
                if helixtype == "H":
                    idprefix = "ALPHAHELIX_"
                    htype = "ALPHA"
                    this_helix_num = helix_num
                    helix_num += 1
                elif helixtype == "I":
                    if not self.include_pi_helices:
                        continue
                    idprefix = "PIHELIX_"
                    htype = "PI"
                    this_helix_num = helix_num
                    helix_num += 1
                elif helixtype == "G":
                    if not self.include_310_helices:
                        continue
                    idprefix = "310HELIX_"
                    htype = "310"
                    this_helix_num = helix_num
                    helix_num += 1
                else:  # shouldn't happen
                    sys.stderr.write("ERROR: bad helix type " + helixtype + "\n")
                ah_node = PTNodeHelix(
                    htype,
                    idprefix + start_chainid + "_" + str(this_helix_num),
                    this_helix_num,
                    start_resnum,
                    end_resnum,
                    start_chainid,
                    domain.domainid,
                    self.residue_list,
                    self.pdb_resid_dict,
                )
                if not self.chain_dict.has_key(start_chainid):
                    self.chain_dict[start_chainid] = []
                self.chain_dict[start_chainid].append(ah_node)

                # we must already have handled the case of SSEs that cross
                # domain boundaries (by moving whole SSE to one of the domains)
                assert domain.is_in_domain(end_chainid, get_int_icode(end_resnum)[0])

        for (start_chainid, start_resnum, end_chainid, end_resnum) in secstruct.strand_list:
            assert start_chainid == end_chainid  # must be in same chain
            if chainid and chainid != start_chainid:
                continue  # chainid specified, skip ones not in that chain
            if domain.is_in_domain(start_chainid, get_int_icode(start_resnum)[0]):
                num_strands_in_domain += 1
                bs_node = PTNodeStrand(
                    "STRAND_" + start_chainid + "_" + str(strand_num),
                    strand_num,
                    start_resnum,
                    end_resnum,
                    start_chainid,
                    domain.domainid,
                    self.residue_list,
                    self.pdb_resid_dict,
                )
                strand_num += 1
                if not self.chain_dict.has_key(start_chainid):
                    self.chain_dict[start_chainid] = []

                # we must already have handled the case of SSEs that cross
                # domain boundaries (by moving whole SSE to one of the domains)
                assert domain.is_in_domain(end_chainid, get_int_icode(end_resnum)[0])
                self.chain_dict[start_chainid].append(bs_node)

        # raise an exception if there are no SSEs at all in this domain
        if num_helices_in_domain == 0 and num_strands_in_domain == 0:
            raise NoSSE_Exception

        delete_chainid_list = []  # list of chainids to delete from chain_dict
        for (chainid, nodelist) in self.chain_dict.iteritems():
            # sort in order of start residue id ascending (all must be disjoint)
            nodelist.sort()

            if len(nodelist) < 1:
                # There are no SSEs in this chain, get rid of it.
                sys.stderr.write("WARNING: no SSEs in chain " + chainid + "; chain ignored\n")
                delete_chainid_list.append(chainid)  # don't delete while in loop
                continue
            else:
                # Check for chain with only SSEs that will not be drawn
                # (i.e. pi or 310 helices), and delete those too
                found_useful_node = False
                for ptnode in nodelist:
                    if isinstance(ptnode, PTNodeStrand):
                        found_useful_node = True
                        break
                    elif isinstance(ptnode, PTNodeHelix):
                        if ptnode.get_type() == "ALPHA":
                            found_useful_node = True
                            break
                        elif (ptnode.get_type() == "310" and self.include_310_helices) or (
                            ptnode.get_type() == "PI" and self.include_pi_helices
                        ):
                            found_useful_node = True
                            break
                if not found_useful_node:
                    sys.stderr.write("WARNING: only pi or 310 helices in chain " + chainid + "; chain ignored\n")
                    delete_chainid_list.append(chainid)
                    continue

        # delete chains from chain_dict that were marked earlier for deletion
        for chainid in delete_chainid_list:
            self.chain_dict.pop(chainid)

        # -------------------------------------------------------------------

        # This is needed only for labelling sheets for HH and KK codes
        # (see dfs_strands() etc. below)

        # add edges for hydrogen bonds
        # uses secstruct and chainid member data
        # these are used for determining which side bridge partners are
        # on (and also for drawing a hydrogen bond graph if requested)
        self.add_hbond_edges_from_secstruct()

        # add edges for bridge partners
        # uses secstruct and chainid member data
        self.add_bridge_edges_from_secstruct()

        # ---------------------------------------------------------------------

        # for sequential numbering, we'll build this dictionary mapping
        # sequential number (note NOT restarting for each chain)
        # to PTNode
        # so that sequential numbers as used in ptgraph2 -b sequential
        # option.
        # this is a dictionary of { seqnum : PTNode }
        self.seqnum2node = {}
        for (seqnum, node) in enumerate(
            [
                node
                for node in self.iter_nodes()
                if not (
                    (isinstance(node, PTNodeTerminus))
                    or (
                        isinstance(node, PTNodeHelix)
                        and (
                            (node.get_type() == "310" and not self.include_310_helices)
                            or (node.get_type() == "PI" and not self.include_pi_helices)
                        )
                    )
                )
            ]
        ):
            self.seqnum2node[seqnum + 1] = node  # start at 1 not 0