def get_nearest_sse_residues(self, sse1, sse2): """ Find the residue in each of the two SSEs that are nearest to each other and were used in building the SSE distance matrix. Uses the sse_residue_map built by calc_sse_dist_matrix() to do this; the idea is that nearest SSEs are found with get_min_distance_sse() or other functions using the SSE distance matrix, then if required this functino is used to retrieve the particular residues that were used in calculating the min distance between SSEs. Parameters: sse1 - PTNode for helix/strand 1 sse2 - PTNode for helix/strand 2 Return value: tuple (res_seq_num_1, res_seq_num_2) where res_seq_num_1 and res_seq_num_2 are the residue sequence numbers in sse1 and sse2 respectively that have min distance to each other (of all residues in sse1 and sse2) Uses data members: sse_residue_map - dict of {(ptnode1, ptnode2) : (residue1, residue2)} which for every pair of sses gives the residue in each which are closest (used in the distance matrix). Note both (ptnode1,ptnode2) and (ptnode2,ptnode1) are stored, with residues swapped appropriately. """ (residue1, residue2) = self.sse_residue_map[sse1, sse2] # id of a residue in Bio.PDB is tuple (hetatm, resseqnum, icode) res_seq_num_1 = biopdbresid_to_pdbresseq(residue1.get_id()) res_seq_num_2 = biopdbresid_to_pdbresseq(residue2.get_id()) return (res_seq_num_1, res_seq_num_2)
def get_nearest_sse_residues(self, sse1, sse2): """ Find the residue in each of the two SSEs that are nearest to each other and were used in building the SSE distance matrix. Uses the sse_residue_map built by calc_sse_dist_matrix() to do this; the idea is that nearest SSEs are found with get_min_distance_sse() or other functions using the SSE distance matrix, then if required this functino is used to retrieve the particular residues that were used in calculating the min distance between SSEs. Parameters: sse1 - PTNode for helix/strand 1 sse2 - PTNode for helix/strand 2 Return value: tuple (res_seq_num_1, res_seq_num_2) where res_seq_num_1 and res_seq_num_2 are the residue sequence numbers in sse1 and sse2 respectively that have min distance to each other (of all residues in sse1 and sse2) Uses data members: sse_residue_map - dict of {(ptnode1, ptnode2) : (residue1, residue2)} which for every pair of sses gives the residue in each which are closest (used in the distance matrix). Note both (ptnode1,ptnode2) and (ptnode2,ptnode1) are stored, with residues swapped appropriately. """ (residue1, residue2) = self.sse_residue_map[sse1, sse2] # id of a residue in Bio.PDB is tuple (hetatm, resseqnum, icode) res_seq_num_1 = biopdbresid_to_pdbresseq(residue1.get_id()) res_seq_num_2 = biopdbresid_to_pdbresseq(residue2.get_id()) return (res_seq_num_1, res_seq_num_2)
def build_graph_from_secstruct(self, secstruct, domain, chainid=None, ignore_insertion_codes=False): """ Build the list of nodes from the the supplied PTSecStruct object. Parameters: secstruct - PTSecStruct (ptsecstruct.py) object to build from domain - PTDomain (ptdomain.py) object listing the segment(s) that make up this domain (only one domain processed at a time). (in/out) NOTE: may be modified by having a segment added if SSE is only partly in domain. chainid - chain identifier to build graph for only this chain, or None for all chains (default) ignore_insertion_codes - If True, a hack to make it work with PMML (only) which does not report insertion codes unlike DSSP and STRIDE Uses member data (write): chain_dict - dict of { chainid : node_list } where node_list is list of nodes in order, built in this function secstruct - keeps a pointer to the supplied secstruct (readonly): pdb_struct - The Bio.PDB parsed PDB struct (atomic co-ordinates) for this protein. include_310_helices, include_pi_helices - if true, include these kinds of helices. Raises exceptions: NoSSE_Exception if no helices or strands found Return value: None. """ self.secstruct = secstruct helix_num = 1 strand_num = 1 num_helices_in_domain = 0 num_strands_in_domain = 0 # # Build dictionary mapping (chainid, pdb_resid) to index in residue_list # for ALL residues, not just those in this domain. # self.residue_list = self.get_residue_list(self.pdb_struct, PTDomain(None, None)) self.pdb_resid_dict = {} seq_indx = 0 while seq_indx < len(self.residue_list): residue = self.residue_list[seq_indx] self.pdb_resid_dict[(ptsecstruct.pdb_chainid_to_stride_chainid( residue.get_full_id()[2]), biopdbresid_to_pdbresseq( residue.get_id(), ignore_insertion_codes))] = seq_indx seq_indx += 1 # Note that now we are only adding elements in the supplied domain, # so the so-called 'chains' may really be segments, i.e. subsequences # of chains (rest of chain may be in other domain(s) self.chain_dict = {} # dict of {chainid : node_list} for (start_chainid, start_resnum, end_chainid, end_resnum, helixtype) \ in secstruct.helix_list: assert (start_chainid == end_chainid) #helix must be same chain if chainid and chainid != start_chainid: continue # chainid specified, skip ones not in that chain # will consider structures in domain if first residue is in domain if domain.is_in_domain(start_chainid, get_int_icode(start_resnum)[0]): num_helices_in_domain += 1 if helixtype == "H": idprefix = "ALPHAHELIX_" htype = "ALPHA" this_helix_num = helix_num helix_num += 1 elif helixtype == "I": if not self.include_pi_helices: continue idprefix = "PIHELIX_" htype = "PI" this_helix_num = helix_num helix_num += 1 elif helixtype == "G": if not self.include_310_helices: continue idprefix = "310HELIX_" htype = "310" this_helix_num = helix_num helix_num += 1 else: # shouldn't happen sys.stderr.write("ERROR: bad helix type " + helixtype + "\n") ah_node = PTNodeHelix(htype, idprefix + start_chainid+"_" +\ str(this_helix_num), this_helix_num, start_resnum, end_resnum, start_chainid, domain.domainid, self.residue_list, self.pdb_resid_dict) if not self.chain_dict.has_key(start_chainid): self.chain_dict[start_chainid] = [] self.chain_dict[start_chainid].append(ah_node) # we must already have handled the case of SSEs that cross # domain boundaries (by moving whole SSE to one of the domains) assert (domain.is_in_domain(end_chainid, get_int_icode(end_resnum)[0])) for (start_chainid, start_resnum, end_chainid, end_resnum) \ in secstruct.strand_list: assert (start_chainid == end_chainid) # must be in same chain if chainid and chainid != start_chainid: continue # chainid specified, skip ones not in that chain if domain.is_in_domain(start_chainid, get_int_icode(start_resnum)[0]): num_strands_in_domain += 1 bs_node = PTNodeStrand("STRAND_"+start_chainid +"_"+\ str(strand_num), strand_num, start_resnum, end_resnum, start_chainid, domain.domainid, self.residue_list, self.pdb_resid_dict) strand_num += 1 if not self.chain_dict.has_key(start_chainid): self.chain_dict[start_chainid] = [] # we must already have handled the case of SSEs that cross # domain boundaries (by moving whole SSE to one of the domains) assert (domain.is_in_domain(end_chainid, get_int_icode(end_resnum)[0])) self.chain_dict[start_chainid].append(bs_node) # raise an exception if there are no SSEs at all in this domain if num_helices_in_domain == 0 and num_strands_in_domain == 0: raise NoSSE_Exception delete_chainid_list = [] # list of chainids to delete from chain_dict for (chainid, nodelist) in self.chain_dict.iteritems(): # sort in order of start residue id ascending (all must be disjoint) nodelist.sort() if len(nodelist) < 1: # There are no SSEs in this chain, get rid of it. sys.stderr.write('WARNING: no SSEs in chain ' + chainid + '; chain ignored\n') delete_chainid_list.append( chainid) # don't delete while in loop continue else: # Check for chain with only SSEs that will not be drawn # (i.e. pi or 310 helices), and delete those too found_useful_node = False for ptnode in nodelist: if isinstance(ptnode, PTNodeStrand): found_useful_node = True break elif isinstance(ptnode, PTNodeHelix): if ptnode.get_type() == "ALPHA": found_useful_node = True break elif ((ptnode.get_type() == "310" and self.include_310_helices) or (ptnode.get_type() == "PI" and self.include_pi_helices)): found_useful_node = True break if not found_useful_node: sys.stderr.write( 'WARNING: only pi or 310 helices in chain ' + chainid + '; chain ignored\n') delete_chainid_list.append(chainid) continue # delete chains from chain_dict that were marked earlier for deletion for chainid in delete_chainid_list: self.chain_dict.pop(chainid) # ------------------------------------------------------------------- # This is needed only for labelling sheets for HH and KK codes # (see dfs_strands() etc. below) # add edges for hydrogen bonds # uses secstruct and chainid member data # these are used for determining which side bridge partners are # on (and also for drawing a hydrogen bond graph if requested) self.add_hbond_edges_from_secstruct() # add edges for bridge partners # uses secstruct and chainid member data self.add_bridge_edges_from_secstruct() #--------------------------------------------------------------------- # for sequential numbering, we'll build this dictionary mapping # sequential number (note NOT restarting for each chain) # to PTNode # so that sequential numbers as used in ptgraph2 -b sequential # option. # this is a dictionary of { seqnum : PTNode } self.seqnum2node = {} for (seqnum, node) in \ enumerate([node for node in self.iter_nodes() if \ not ( (isinstance(node, PTNodeTerminus)) or (isinstance(node, PTNodeHelix) and ( (node.get_type() == "310" and not self.include_310_helices) or (node.get_type() == "PI" and not self.include_pi_helices) ) ) ) ]): self.seqnum2node[seqnum + 1] = node # start at 1 not 0
def build_graph_from_secstruct(self, secstruct, domain, chainid=None, ignore_insertion_codes=False): """ Build the list of nodes from the the supplied PTSecStruct object. Parameters: secstruct - PTSecStruct (ptsecstruct.py) object to build from domain - PTDomain (ptdomain.py) object listing the segment(s) that make up this domain (only one domain processed at a time). (in/out) NOTE: may be modified by having a segment added if SSE is only partly in domain. chainid - chain identifier to build graph for only this chain, or None for all chains (default) ignore_insertion_codes - If True, a hack to make it work with PMML (only) which does not report insertion codes unlike DSSP and STRIDE Uses member data (write): chain_dict - dict of { chainid : node_list } where node_list is list of nodes in order, built in this function secstruct - keeps a pointer to the supplied secstruct (readonly): pdb_struct - The Bio.PDB parsed PDB struct (atomic co-ordinates) for this protein. include_310_helices, include_pi_helices - if true, include these kinds of helices. Raises exceptions: NoSSE_Exception if no helices or strands found Return value: None. """ self.secstruct = secstruct helix_num = 1 strand_num = 1 num_helices_in_domain = 0 num_strands_in_domain = 0 # # Build dictionary mapping (chainid, pdb_resid) to index in residue_list # for ALL residues, not just those in this domain. # self.residue_list = self.get_residue_list(self.pdb_struct, PTDomain(None, None)) self.pdb_resid_dict = {} seq_indx = 0 while seq_indx < len(self.residue_list): residue = self.residue_list[seq_indx] self.pdb_resid_dict[ ( ptsecstruct.pdb_chainid_to_stride_chainid(residue.get_full_id()[2]), biopdbresid_to_pdbresseq(residue.get_id(), ignore_insertion_codes), ) ] = seq_indx seq_indx += 1 # Note that now we are only adding elements in the supplied domain, # so the so-called 'chains' may really be segments, i.e. subsequences # of chains (rest of chain may be in other domain(s) self.chain_dict = {} # dict of {chainid : node_list} for (start_chainid, start_resnum, end_chainid, end_resnum, helixtype) in secstruct.helix_list: assert start_chainid == end_chainid # helix must be same chain if chainid and chainid != start_chainid: continue # chainid specified, skip ones not in that chain # will consider structures in domain if first residue is in domain if domain.is_in_domain(start_chainid, get_int_icode(start_resnum)[0]): num_helices_in_domain += 1 if helixtype == "H": idprefix = "ALPHAHELIX_" htype = "ALPHA" this_helix_num = helix_num helix_num += 1 elif helixtype == "I": if not self.include_pi_helices: continue idprefix = "PIHELIX_" htype = "PI" this_helix_num = helix_num helix_num += 1 elif helixtype == "G": if not self.include_310_helices: continue idprefix = "310HELIX_" htype = "310" this_helix_num = helix_num helix_num += 1 else: # shouldn't happen sys.stderr.write("ERROR: bad helix type " + helixtype + "\n") ah_node = PTNodeHelix( htype, idprefix + start_chainid + "_" + str(this_helix_num), this_helix_num, start_resnum, end_resnum, start_chainid, domain.domainid, self.residue_list, self.pdb_resid_dict, ) if not self.chain_dict.has_key(start_chainid): self.chain_dict[start_chainid] = [] self.chain_dict[start_chainid].append(ah_node) # we must already have handled the case of SSEs that cross # domain boundaries (by moving whole SSE to one of the domains) assert domain.is_in_domain(end_chainid, get_int_icode(end_resnum)[0]) for (start_chainid, start_resnum, end_chainid, end_resnum) in secstruct.strand_list: assert start_chainid == end_chainid # must be in same chain if chainid and chainid != start_chainid: continue # chainid specified, skip ones not in that chain if domain.is_in_domain(start_chainid, get_int_icode(start_resnum)[0]): num_strands_in_domain += 1 bs_node = PTNodeStrand( "STRAND_" + start_chainid + "_" + str(strand_num), strand_num, start_resnum, end_resnum, start_chainid, domain.domainid, self.residue_list, self.pdb_resid_dict, ) strand_num += 1 if not self.chain_dict.has_key(start_chainid): self.chain_dict[start_chainid] = [] # we must already have handled the case of SSEs that cross # domain boundaries (by moving whole SSE to one of the domains) assert domain.is_in_domain(end_chainid, get_int_icode(end_resnum)[0]) self.chain_dict[start_chainid].append(bs_node) # raise an exception if there are no SSEs at all in this domain if num_helices_in_domain == 0 and num_strands_in_domain == 0: raise NoSSE_Exception delete_chainid_list = [] # list of chainids to delete from chain_dict for (chainid, nodelist) in self.chain_dict.iteritems(): # sort in order of start residue id ascending (all must be disjoint) nodelist.sort() if len(nodelist) < 1: # There are no SSEs in this chain, get rid of it. sys.stderr.write("WARNING: no SSEs in chain " + chainid + "; chain ignored\n") delete_chainid_list.append(chainid) # don't delete while in loop continue else: # Check for chain with only SSEs that will not be drawn # (i.e. pi or 310 helices), and delete those too found_useful_node = False for ptnode in nodelist: if isinstance(ptnode, PTNodeStrand): found_useful_node = True break elif isinstance(ptnode, PTNodeHelix): if ptnode.get_type() == "ALPHA": found_useful_node = True break elif (ptnode.get_type() == "310" and self.include_310_helices) or ( ptnode.get_type() == "PI" and self.include_pi_helices ): found_useful_node = True break if not found_useful_node: sys.stderr.write("WARNING: only pi or 310 helices in chain " + chainid + "; chain ignored\n") delete_chainid_list.append(chainid) continue # delete chains from chain_dict that were marked earlier for deletion for chainid in delete_chainid_list: self.chain_dict.pop(chainid) # ------------------------------------------------------------------- # This is needed only for labelling sheets for HH and KK codes # (see dfs_strands() etc. below) # add edges for hydrogen bonds # uses secstruct and chainid member data # these are used for determining which side bridge partners are # on (and also for drawing a hydrogen bond graph if requested) self.add_hbond_edges_from_secstruct() # add edges for bridge partners # uses secstruct and chainid member data self.add_bridge_edges_from_secstruct() # --------------------------------------------------------------------- # for sequential numbering, we'll build this dictionary mapping # sequential number (note NOT restarting for each chain) # to PTNode # so that sequential numbers as used in ptgraph2 -b sequential # option. # this is a dictionary of { seqnum : PTNode } self.seqnum2node = {} for (seqnum, node) in enumerate( [ node for node in self.iter_nodes() if not ( (isinstance(node, PTNodeTerminus)) or ( isinstance(node, PTNodeHelix) and ( (node.get_type() == "310" and not self.include_310_helices) or (node.get_type() == "PI" and not self.include_pi_helices) ) ) ) ] ): self.seqnum2node[seqnum + 1] = node # start at 1 not 0