def tuplecmp(tup1, tup2): """ Comparison function for (chain, pdb_seqres_strt, pdb_seqres_end, type) tuples used by sort in PTSecStruct.check_validity() and PTSecStruct.__str__() functions. """ if tup1[0] < tup2[0]: return -1 elif tup1[0] > tup2[0]: return 1 else: return pdb_res_seq_cmp(tup1[1], tup2[1])
def check_validity_and_fix(self): """ Check for overlapping secondary structures. This happens for example in the PDB HELIX records for 1DLC. In such a case we recover from it in for example this case by adding or subtracting one to start/end of ovlerlapping HELIX records, Parameters: None Return value: True if OK, False if invalid (overlapping structures) (Now returns True if it has fixed up overlaps itself) Uses data members (READ/WRITE): helix_list, strand_list (start and end in helix and strand tuples may be modified; lists are sorted by increasing residue sequence number) """ helices = [(chain, start, end, endchain, 'H', htype) for (chain, start, endchain, end, htype) in self.helix_list] strands = [(chain, start, end, endchain, 'E', None) for (chain, start, endchain, end) in self.strand_list] sselist = helices + strands sselist.sort(cmp=tuplecmp) is_valid = True for i in xrange(1, len(sselist)): sse = sselist[i] prevsse = sselist[i - 1] if (prevsse[0] == sse[0] and pdb_res_seq_cmp(sse[1], prevsse[2]) <= 0): sys.stderr.write('WARNING: PDB has overlapping SSE definitions' ' ' + str(prevsse) + ' and ' + str(sse) + ': ') # remove overlap by shortening longer one and lengthing # shorter one # FIXME: this is ignoring insertion codes etc., really # should convert to proper sequential residue sequence numbers # to do this (prevsse_start, prevsse_start_icode) = get_int_icode(prevsse[1]) (prevsse_end, prevsse_end_icode) = get_int_icode(prevsse[2]) (sse_start, sse_start_icode) = get_int_icode(sse[1]) (sse_end, sse_end_icode) = get_int_icode(sse[2]) if (prevsse_end_icode or sse_start_icode): sys.stderr.write('contains insertion codes, giving up\n') is_valid = False continue prevsse_len = prevsse_end - prevsse_start + 1 sse_len = sse_end - sse_start + 1 overlap = prevsse_end - sse_start + 1 if sse_len > prevsse_len: sse_start += overlap else: prevsse_end -= overlap sselist[i] = (sse[0], str(sse_start), str(sse_end), sse[3], sse[4], sse[5]) sselist[i - 1] = (prevsse[0], str(prevsse_start), str(prevsse_end), prevsse[3], prevsse[4], prevsse[5]) sys.stderr.write('changed to ' + str(sselist[i - 1]) + ' and ' + str(sselist[i]) + '\n') i += 1 # rebuild the helix_list and strand_list with our modified tuples self.helix_list = [(chain, start, endchain, end, htype) for (chain, start, end, endchain, ssetype, htype) in sselist if ssetype == 'H'] self.strand_list = [(chain, start, endchain, end) for (chain, start, end, endchain, ssetype, htype) in sselist if ssetype == 'E'] return is_valid
def check_validity_and_fix(self): """ Check for overlapping secondary structures. This happens for example in the PDB HELIX records for 1DLC. In such a case we recover from it in for example this case by adding or subtracting one to start/end of ovlerlapping HELIX records, Parameters: None Return value: True if OK, False if invalid (overlapping structures) (Now returns True if it has fixed up overlaps itself) Uses data members (READ/WRITE): helix_list, strand_list (start and end in helix and strand tuples may be modified; lists are sorted by increasing residue sequence number) """ helices = [ (chain, start, end, endchain, 'H', htype) for (chain, start, endchain, end, htype) in self.helix_list ] strands = [ (chain, start, end, endchain, 'E', None) for (chain, start, endchain, end) in self.strand_list ] sselist = helices + strands sselist.sort(cmp=tuplecmp) is_valid = True for i in xrange(1, len(sselist)): sse = sselist[i] prevsse = sselist[i-1] if (prevsse[0] == sse[0] and pdb_res_seq_cmp(sse[1], prevsse[2]) <= 0): sys.stderr.write('WARNING: PDB has overlapping SSE definitions' ' ' + str(prevsse) + ' and ' + str(sse) + ': ') # remove overlap by shortening longer one and lengthing # shorter one # FIXME: this is ignoring insertion codes etc., really # should convert to proper sequential residue sequence numbers # to do this (prevsse_start,prevsse_start_icode) = get_int_icode(prevsse[1]) (prevsse_end,prevsse_end_icode) = get_int_icode(prevsse[2]) (sse_start,sse_start_icode) = get_int_icode(sse[1]) (sse_end,sse_end_icode) = get_int_icode(sse[2]) if (prevsse_end_icode or sse_start_icode): sys.stderr.write('contains insertion codes, giving up\n') is_valid = False continue prevsse_len = prevsse_end - prevsse_start + 1 sse_len = sse_end - sse_start + 1 overlap = prevsse_end - sse_start + 1 if sse_len > prevsse_len: sse_start += overlap else: prevsse_end -= overlap sselist[i] = (sse[0],str(sse_start),str(sse_end), sse[3],sse[4],sse[5]) sselist[i-1] = (prevsse[0],str(prevsse_start),str(prevsse_end), prevsse[3],prevsse[4],prevsse[5]) sys.stderr.write('changed to ' + str(sselist[i-1]) + ' and ' + str(sselist[i]) + '\n') i += 1 # rebuild the helix_list and strand_list with our modified tuples self.helix_list = [ (chain, start, endchain, end, htype) for (chain, start, end, endchain, ssetype, htype) in sselist if ssetype == 'H' ] self.strand_list = [ (chain, start, endchain, end) for (chain, start, end, endchain, ssetype, htype) in sselist if ssetype == 'E' ] return is_valid