def GetTopoAlignStat(topo1, topo2): stat = [] posTM1 = myfunc.GetTMPosition(topo1) if len(posTM1) > 0: for (b, e) in posTM1: segList1 = [] segList2 = [] cntTM = 0 cntGap = 0 cntSeq = 0 for j in xrange(b, e): if topo1[j] == 'M': segList2.append(topo2[j]) if topo2[j] == 'M': cntTM += 1 elif topo2[j] == '-': cntGap += 1 else: cntSeq += 1 rd = {} sizeSeg = len(segList2) freqTM = myfunc.FloatDivision(cntTM, sizeSeg) freqGap = myfunc.FloatDivision(cntGap, sizeSeg) freqSeq = myfunc.FloatDivision(cntSeq, sizeSeg) rd['seg2'] = ''.join(segList2) rd['freqTM'] = freqTM rd['freqGap'] = freqGap rd['freqSeq'] = freqSeq stat.append(rd) return stat
def MaskTopologyBySignalPeptide(idList, topoList, signalpDict): newTopoList = [] for i in xrange(len(idList)): topo = topoList[i] if idList[i] in signalpDict: posTMList = myfunc.GetTMPosition(topo) try: posSigP = signalpDict[idList[i]] (b,e) = (posTMList[0][0],posTMList[0][1]) cov = myfunc.coverage(0, posSigP, b, e) if float(cov)/(e-b) > 0.5: #mask masked_state = topo[e] newTopo = ( "".join([masked_state]*(e)) + topo[e:]) newTopoList.append(newTopo) if DEBUG: print print "posTM", (b,e), "SignalPeptide", posSigP print topo print newTopo else: newTopoList.append(topo) except (KeyError, IndexError): newTopoList.append(topo) else: newTopoList.append(topo) return newTopoList
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 topofile = "" outfile = "" isGapLess = False i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: topofile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o"]: outfile = argv[i + 1] i += 2 elif argv[i] in ["-i", "--i"]: topofile = argv[i + 1] i += 2 elif argv[i] in ["-gapless", "--gapless"]: isGapLess = True i += 1 elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: topofile = argv[i] i += 1 if topofile == "": print >> sys.stderr, "topofile not set. exit" return 1 try: (idList, annoList, seqList) = myfunc.ReadFasta(topofile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) for i in xrange(len(idList)): topo = seqList[i] seqid = idList[i] if isGapLess: topo = topo.replace("-", "").replace(".", "") posTMList = myfunc.GetTMPosition(topo) print >> fpout, seqid, posTMList myfunc.myclose(fpout) except (IOError, IndexError): pass
def IsIdenticalTopology_simple( topo1, topo2, min_TM_overlap = 5):#{{{ """Check whether topo1 and topo2 are identical""" # Created 2011-11-15, updated 2011-11-15 # Two topologies are considered identical (Krogh et al. 2001) if # 1. numTM1 == numTM2 # 2. Each helix of the compared topology should overlap by at least N (e.g. 5) # residues # 3. Each helix is oriented in the same way numTM1 = myfunc.CountTM(topo1) numTM2 = myfunc.CountTM(topo2) Nterm1 = GetNtermState(topo1) Nterm2 = GetNtermState(topo2) posTM1 = myfunc.GetTMPosition(topo1) posTM2 = myfunc.GetTMPosition(topo2) if numTM1 != numTM2: return False else: if Nterm1 != Nterm2: return False else: for i in range (numTM1): (b1,e1) = posTM1[i] (b2,e2) = posTM2[i] (common_b, common_e) = (max(b1,b2), min(e1,e2)) overlap = common_e - common_b if overlap <= 0: return False else: cntCommonM = 0 for j in range(common_b, common_e): if topo1[j] == 'M' and topo2[j] == 'M': cntCommonM += 1 if cntCommonM >= min_TM_overlap: break # print ("cntCommonM=", cntCommonM, "min_TM_overlap=", # min_TM_overlap) if cntCommonM < min_TM_overlap: return False return True
def MatchTopology(targetTopo, topoList, min_TM_overlap = 5, seqid = ""):#{{{ ## compare targetTopo to all topologies in the topoList # return (matchList, numIDTtopo, numPredictor numList = len(topoList) matchList = [] # 0 for different topology # 1 for identical topology # -1 for empty topology # debug # print "SeqID: %s"%(seqid) # print GetNtermState(targetTopo), myfunc.GetTMPosition(targetTopo) # print # for tt in topoList: # if tt: # print GetNtermState(tt), myfunc.GetTMPosition(tt) # else: # print "Null" # print NtermStateTarget = GetNtermState(targetTopo) posTMtarget = myfunc.GetTMPosition(targetTopo) numTMtarget = len(posTMtarget) for i in range(numList): if topoList[i] == "": matchList.append(-1) else: NtermState = GetNtermState(topoList[i]) posTM = myfunc.GetTMPosition(topoList[i]) numTM = len(posTM) if IsIdenticalTopology(NtermStateTarget, NtermState, numTMtarget, numTM, posTMtarget, posTM, targetTopo, topoList[i], min_TM_overlap): matchList.append(1) else: matchList.append(0) numIDTtopo = matchList.count(1) numPredictor = matchList.count(1) + matchList.count(0) return (matchList, numIDTtopo, numPredictor)
def CalKRBias(seq, topo, flank_win, max_dist): kr_bias = None KR_pos_list = [] # [[1,5], [1,3]] posTM = myfunc.GetTMPosition(topo) NtermState = lcmp.GetNtermState(topo) numTM = len(posTM) length = len(seq) if numTM >= 1: sum_KR_odd = 0 sum_KR_even = 0 for i in range(numTM + 1): if i == 0 or i == numTM: if i == 0: b = max(0, posTM[i][0] - max_dist) e = posTM[i][0] + flank_win else: b = posTM[i - 1][1] - flank_win e = min(length, posTM[i - 1][1] + max_dist) KRpos = GetKRPos(seq, b, e) else: if posTM[i][0] - posTM[i - 1][1] > 2 * max_dist: b1 = posTM[i - 1][1] - flank_win e1 = posTM[i - 1][1] + max_dist b2 = posTM[i][0] - max_dist e2 = posTM[i][0] + flank_win KRpos = GetKRPos(seq, b1, e1) KRpos += GetKRPos(seq, b2, e2) else: b = posTM[i - 1][1] - flank_win e = posTM[i][0] + flank_win # print (b,e) # print len(seq) # print "flank_win=",flank_win # print "i=",i # print "numTM=", len(posTM) KRpos = GetKRPos(seq, b, e) KR_pos_list.append(KRpos) if i % 2 == 0: sum_KR_odd += len(KRpos) else: sum_KR_even += len(KRpos) kr_bias = sum_KR_odd - sum_KR_even # print KR_pos_list return (kr_bias, KR_pos_list, numTM)
def IsAllIdenticalTopology(topoList): #{{{ numSeq = len(topoList) if numSeq <= 1: return True else: posTMList = [myfunc.GetTMPosition(topo) for topo in topoList] NtermStateList = [lcmp.GetNtermState(topo) for topo in topoList] numTMList = [len(posTM) for posTM in posTMList] for i in xrange(numSeq - 1): for j in xrange(i + 1, numSeq): if not lcmp.IsIdenticalTopology( NtermStateList[i], NtermStateList[j], numTMList[i], numTMList[j], posTMList[i], posTMList[j], topoList[i], topoList[j], min_TM_overlap): return False return True
def RunHHSearchPairwise(tableinfoFile, #{{{ hhprofilepathList, hhprofilepathMapDictList, hhsearchpathList, hhsearchpathMapDictList, topoDict, outpath, dupfile): fpoutDup = None if dupfile != "": fpoutDup = myfunc.myopen(dupfile, sys.stdout, "w", False) hdl = myfunc.ReadLineByBlock(tableinfoFile) if hdl.failure: return 1 cnt = 0 lines = hdl.readlines() while lines != None: for line in lines: if not line or line[0] == "#": continue strs = line.split() try: seqid1 = strs[0] seqid2 = strs[1] except (IndexError, ValueError): print >> sys.stderr, "Bad record line \"%s\""%(line) continue try: topo1 = topoDict[seqid1] except KeyError: topo1 = "" try: topo2 = topoDict[seqid2] except KeyError: topo2 = "" seqlen1 = len(topo1) seqlen2 = len(topo2) pairlist = [(seqid1, seqlen1), (seqid2, seqlen2)] pairlist = sorted(pairlist, key=lambda x:x[1], reverse=False) # short - long hhrfile = "%s%s%s_%s.hhr"%(outpath, os.sep, seqid1, seqid2) if g_params['isUsePreBuildHHSearchResult']: keystr = "%s_%s"%(seqid1, seqid2) tmp_hhrfile = GetProfileFileName(hhsearchpathList, hhsearchpathMapDictList, keystr, ".hhr") if os.path.exists(tmp_hhrfile): hhrfile = tmp_hhrfile else: print >> sys.stderr, "hhrfile %s does not exist in"\ " the prebuilt path"%(hhrfile) # update seqid1 and seqid2 (shorter - longer) seqid1 = pairlist[0][0] # shorter sequence seqid2 = pairlist[1][0] # longer sequence try: topo1 = topoDict[seqid1] except KeyError: topo1 = "" try: topo2 = topoDict[seqid2] except KeyError: topo2 = "" seqlen1 = len(topo1) seqlen2 = len(topo2) numTM1 = len(myfunc.GetTMPosition(topo1)) numTM2 = len(myfunc.GetTMPosition(topo2)) if not os.path.exists(hhrfile) or g_params['isForceOverWrite']: a3mfile = GetProfileFileName(hhprofilepathList, #query hhprofilepathMapDictList, pairlist[0][0], ".a3m") hhmfile = GetProfileFileName(hhprofilepathList, #template hhprofilepathMapDictList, pairlist[1][0], ".hhm") if a3mfile == "" or not os.path.exists(a3mfile): print >> sys.stderr, "a3mfile not found for %s. Ignore." %(pairlist[0][0]) elif hhmfile == "" or not os.path.exists(hhmfile): print >> sys.stderr, "hhmfile not found for %s. Ignore." %(pairlist[1][0]) else: tmp_hhrfile = "%s.tmp"%(hhrfile) cmd = "hhsearch -i %s -d %s -o %s -v 0 -nocons -nopred -nodssp" % ( a3mfile, hhmfile, tmp_hhrfile) os.system(cmd) if os.path.exists(tmp_hhrfile): os.system("/bin/mv -f %s %s"%(tmp_hhrfile, hhrfile)) print hhrfile, "output" if fpoutDup and os.path.exists(hhrfile): ss_isdup = "" hitinfo = {} # if IsDuplicatedByHHSearch(hhrfile, seqid1, seqid2, cnt): # ss_isdup = 'y' # else: # ss_isdup = 'n' hitinfo = CheckDuplication(hhrfile, seqid1, seqid2, topoDict, cnt) if hitinfo != {}: fpoutDup.write("%s-%s %s %4d %4d %4d %4d" %( seqid1, seqid2, hitinfo['isDup'], len(topo1), len(topo2), numTM1, numTM2)) if 'hit' in hitinfo: for j in xrange(len(hitinfo['hit'])): hit = hitinfo['hit'][j] ss_hit = "%d-%d(nTM=%d) %d-%d(nTM=%d)"%( hit['posQuery'][0], hit['posQuery'][1], hit['numTM1'], hit['posTemplate'][0], hit['posTemplate'][1], hit['numTM2']) fpoutDup.write(" | %35s"%(ss_hit)) fpoutDup.write("\n") cnt += 1 lines = hdl.readlines() hdl.close() myfunc.myclose(fpoutDup) return 0
def CheckDuplication(hhrfile, seqid1, seqid2, topoDict, cnt):#{{{ hitinfo = {} try: # Read in hhsearch hits fpin = open(hhrfile,"r") lines = fpin.readlines() fpin.close() except IOError: print >> sys.stderr, "Failed to read hhrfile %s"%hhrfile return {} try: topo1 = topoDict[seqid1] except KeyError: topo1 = "" try: topo2 = topoDict[seqid2] except KeyError: topo2 = "" lengthQuery = 0 lengthTemplate = 0 hitList = [] numLine = len(lines) i = 0 while i < numLine: line = lines[i] if line.find("Match_columns") == 0: try: lengthQuery = int(line.split()[1]) hitinfo['seqLen1'] = lengthQuery except (IndexError, ValueError): print >> sys.stderr, "Error in hhrfile %s. Ignore"%(hhrfile) return {} i += 1 elif line.find(" No Hit") == 0: j = 1 while i+j < numLine and lines[i+j] != "": hit = ExtractHit(lines[i+j]) if hit != {} and hit['evalue'] <= EVALUE_THRESHOLD: posQuery = hit['posQuery'] posTemplate = hit['posTemplate'] if topo1 != "": hit['numTM1'] = len(myfunc.GetTMPosition(topo1[posQuery[0]:posQuery[1]])) else: hit['numTM1'] = 0 if topo2 != "": hit['numTM2'] = len(myfunc.GetTMPosition(topo2[posTemplate[0]:posTemplate[1]])) else: hit['numTM2'] = 0 hitList.append(hit) else: break j += 1 break else: i += 1 hitList = sorted(hitList, key=lambda x:x['evalue'], reverse=False) hitinfo['hit'] = hitList numHit = len(hitList) if numHit < 2: # there should be at least two hits isDup = False else: isDup = IsDuplicated(hitList, len(topo1), len(topo2)) if isDup: ss_isdup = 'y' hitinfo['isDup'] = 'y' else: ss_isdup = 'n' hitinfo['isDup'] = 'n' sys.stdout.write("%d: %s-%s %s numHit=%d\n" %(cnt, seqid1, seqid2, ss_isdup, numHit)) return hitinfo
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" seqdbfile = "" infile = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outfile", "--outfile"]: outfile = argv[i+1] i += 2 elif argv[i] in ["-seqdb", "--seqdb"]: seqdbfile = argv[i+1] i += 2 elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if infile == "": print >> sys.stderr, "topofile not set" return 1 elif not os.path.exists(infile): print >> sys.stderr, "topofile %s does not exist"%(infile) return 1 # if seqdbfile == "": # print >> sys.stderr, "seqdbfile file not set" # return 1 # elif not os.path.exists(seqdbfile): # print >> sys.stderr, "seqdbfile file %s does not exist"%(seqdbfile) # return 1 # seqDict = GetSeqDict(seqdbfile) # if seqDict == {}: # print >> sys.stderr, "Failed to read seqdbfile %s"%(seqdbfile) # return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) fpin = open (infile, "rb"); if not fpin: print >> sys.stderr, "Failed to open input file %s"%(infile) return 1 unprocessedBuffer=""; isEOFreached = False; processedTopoIDSet = set([]); while 1: buff = fpin.read(BLOCK_SIZE); if len(buff) < BLOCK_SIZE: isEOFreached=True; buff = unprocessedBuffer + buff; recordList = []; unprocessedBuffer = myfunc.ReadFastaFromBuffer(buff,recordList, isEOFreached); if len(recordList) > 0: idListTopo = [r[0] for r in recordList]; topoList = [r[2] for r in recordList]; for i in xrange(len(idListTopo)): seqid = idListTopo[i] topo = topoList[i] posTM = myfunc.GetTMPosition(topo) if len(posTM) > 0: cnt = 0 for (b,e) in posTM: seg = topo[b:e] fpout.write("%s\t%4d\t%s\n"%(seqid, cnt+1, seg)) cnt += 1 if isEOFreached == True: break; fpin.close(); myfunc.myclose(fpout)
def CountIdenticalTopology(pred_topodict, real_topodict, agreement, TM_type, fpout_wrong, seqDict, pred_method_item): #{{{ """ return (cntIDT, cntINV) """ numPredTopo = len(pred_topodict) cntIDT = 0 cntINV = 0 cntDIFF = 0 for seqid in pred_topodict: predtopo = pred_topodict[seqid] try: realtopo = real_topodict[seqid] except KeyError: print >> sys.stderr, "%s a nonTM protein predicted as TM protein" % ( seqid) realtopo = "i" * len(predtopo) pass pred_NtermState = lcmp.GetNtermState(predtopo) real_NtermState = lcmp.GetNtermState(realtopo.replace('.', '-')) pred_posTM = myfunc.GetTMPosition(predtopo) real_posTM = myfunc.GetTMPosition(realtopo) pred_numTM = len(pred_posTM) real_numTM = len(real_posTM) # if g_params['isDEBUG'] and seqid == "3cx5I": # print "pred_NtermState = <%s>"%pred_NtermState # print "real_NtermState = <%s>"% real_NtermState # print "pred_posTM = ", pred_posTM # print "real_posTM = ", real_posTM if lcmp.IsIdenticalTopology(pred_NtermState, real_NtermState, pred_numTM, real_numTM, pred_posTM, real_posTM, predtopo, realtopo, g_params['min_TM_overlap']): cntIDT += 1 else: if fpout_wrong != None: # output the wrongly predict topology fpout_wrong.write(">%s Number %d mtd_%s\n" % (seqid, cntDIFF + 1, pred_method_item)) try: seq = seqDict[seqid] fpout_wrong.write("%-10s %s\n" % ("AASeq", seq)) except KeyError: seq = "" fpout_wrong.write("%-10s %s\n" % ("RealTop", realtopo)) fpout_wrong.write("%-10s %s\n" % ("PredTop", predtopo)) fpout_wrong.write("\n") if lcmp.IsInvertedTopology(pred_NtermState, real_NtermState, pred_numTM, real_numTM, pred_posTM, real_posTM, predtopo, realtopo, g_params['min_TM_overlap']): cntINV += 1 if g_params['isDEBUG']: print >> sys.stderr, "%-7s(real %3s) nTM=%2d %s" % ( seqid, agreement, real_numTM, realtopo) print >> sys.stderr, "%-7s(pred %3s) nTM=%2d %s" % ( seqid, agreement, pred_numTM, predtopo) print >> sys.stderr cntDIFF += 1 return (cntIDT, cntINV)
def WriteInfo(groupList, seqlenDict, seqannoDict, pfamidDefDict,#{{{ clanidDefDict, topoDict, groupedPfamScanDict, htmlname, fpout): cnt = 0 for tup in groupList: try: ss = tup[0] seqidlist = tup[2] famidlist = ss.split("\t") fpout.write("Group %d: %d seqs, %d domains "%(cnt+1, tup[1], len(famidlist))) for famid in famidlist: if famid[0] == 'P': famdef = pfamidDefDict[famid] else: famdef = clanidDefDict[famid] fpout.write(" %s (%s)"%(famid, famdef)) fpout.write("\n") fpout.write("#%-3s %10s %4s %3s %15s %5s\n"%("No", "SeqID", "Len", "nTM", "DomainCoverage", "nTM_within")) cntseq = 0 for seqid in seqidlist: try: seqlen = seqlenDict[seqid] except KeyError: seqlen = -1 pass fpout.write("%-4d %10s %4d"%(cntseq+1, seqid, seqlen)) try: topo = topoDict[seqid] posTM = myfunc.GetTMPosition(topo) except KeyError: print >> sys.stderr, "topo not found for %s"%seqid fpout.write("\n") continue fpout.write(" %3d"%(len(posTM))) pfamscan_hit = groupedPfamScanDict[seqid] for famid in famidlist: try: b1 = pfamscan_hit[famid]['alnBeg'] e1 = pfamscan_hit[famid]['alnEnd'] (posTM_covered, indexList_covered) = GetCoveredTM((b1,e1), posTM) fpout.write("%15s %5s %4s"%("(%d,%d)"%(b1,e1), "%d TM"%(len(posTM_covered)), "%d-%d"%(indexList_covered[0]+1, indexList_covered[len(indexList_covered)-1]+1) )) except (KeyError): print >> sys.stderr, "%s not in pfamscan_hit"%(famid) pass fpout.write("\n") cntseq += 1 except (KeyError, IndexError): print >> sys.stderr, "Error for %s"%(tup[0]) pass cnt += 1 fpout.write("\n")
def AnaLocalTopoAln(idList, topoList, localseqpairDict, fpout, fpout1): # fpout: write result for those one end with no TM region # fpout1: write result for those one end with one TM region but less then 5 # residues of TM region numseq = len(idList) numpair = numseq / 2 for i in xrange(numpair): id1 = idList[2 * i] id2 = idList[2 * i + 1] topo1 = topoList[2 * i] topo2 = topoList[2 * i + 1] lengthAln = len(topo1) try: unaligned_str = localseqpairDict[(id1, id2)][2] except (KeyError, IndexError): print >> sys.stderr, "no local alignment found for %s %s" % (id1, id2) continue alignedPosList = myfunc.GetSegPos(unaligned_str, "1") if len(alignedPosList) != 1: print >> sys.stderr, "aligned region not equal 1 for %s %s" % (id1, id2) continue else: alignedPos = alignedPosList[0] if alignedPos[0] == 0 and alignedPos[1] == lengthAln: print "%s %s: Full aligned" % (id1, id2) else: alignedPos = alignedPosList[0] topo_Nterm1 = topo1[:alignedPos[0]] topo_Cterm1 = topo1[alignedPos[1]:] topo_Nterm2 = topo2[:alignedPos[0]] topo_Cterm2 = topo2[alignedPos[1]:] posTM_Nterm1 = myfunc.GetTMPosition(topo_Nterm1) posTM_Cterm1 = myfunc.GetTMPosition(topo_Cterm1) posTM_Nterm2 = myfunc.GetTMPosition(topo_Nterm2) posTM_Cterm2 = myfunc.GetTMPosition(topo_Cterm2) s_Nterm = GetUnglianedTermStatus(posTM_Nterm1, posTM_Nterm2) s_Cterm = GetUnglianedTermStatus(posTM_Cterm1, posTM_Cterm2) # s1_Nterm, s2_Nterm is used to record the status of those # unaligned terminals with one has a splitted TM helices and # another has >= 1 TM helix s1_Nterm = GetUnglianedTermStatus1(posTM_Nterm1, posTM_Nterm2) s1_Cterm = GetUnglianedTermStatus1(posTM_Cterm1, posTM_Cterm2) if s_Nterm < 2 and s_Cterm < 2 and (s_Nterm + s_Cterm) > 0: if len(posTM_Nterm1) > 0: num_res_unaligned_Nterm = len(topo_Nterm2.replace("-", "")) numTM_unaligned_Nterm = len(posTM_Nterm1) num_res_to_TM_Nterm = len(topo_Nterm1) - posTM_Nterm1[ len(posTM_Nterm1) - 1][1] elif len(posTM_Nterm2) > 0: num_res_unaligned_Nterm = len(topo_Nterm1.replace("-", "")) numTM_unaligned_Nterm = len(posTM_Nterm2) num_res_to_TM_Nterm = len(topo_Nterm2) - posTM_Nterm2[ len(posTM_Nterm2) - 1][1] else: num_res_unaligned_Nterm = 0 numTM_unaligned_Nterm = 0 num_res_to_TM_Nterm = 0 if len(posTM_Cterm1) > 0: num_res_unaligned_Cterm = len(topo_Cterm2.replace("-", "")) numTM_unaligned_Cterm = len(posTM_Cterm1) num_res_to_TM_Cterm = posTM_Cterm1[0][0] elif len(posTM_Cterm2) > 0: num_res_unaligned_Cterm = len(topo_Cterm1.replace("-", "")) numTM_unaligned_Cterm = len(posTM_Cterm2) num_res_to_TM_Cterm = posTM_Cterm2[0][0] else: num_res_unaligned_Cterm = 0 numTM_unaligned_Cterm = 0 num_res_to_TM_Cterm = 0 ss = "%s %s %4d %4d %4d %4d %4d %4d" print >> fpout, ss % ( id1, id2, num_res_unaligned_Nterm, num_res_to_TM_Nterm, numTM_unaligned_Nterm, num_res_unaligned_Cterm, num_res_to_TM_Cterm, numTM_unaligned_Cterm) if ((s1_Nterm == 1.5 or s1_Cterm == 1.5) and s1_Nterm < 2 and s1_Cterm < 2): num_res_unaligned_Nterm = -1 numRes_PartHelix_Nterm = -1 numTM_unaligned_Nterm = -1 num_res_to_TM_Nterm = -1 num_res_unaligned_Cterm = -1 numRes_PartHelix_Cterm = -1 numTM_unaligned_Cterm = -1 num_res_to_TM_Cterm = -1 if s1_Nterm == 1.5: if len( posTM_Nterm1 ) == 1 and posTM_Nterm1[0][1] - posTM_Nterm1[0][0] < 5: num_res_unaligned_Nterm = len( topo_Nterm1.replace("-", "")) numRes_PartHelix_Nterm = posTM_Nterm1[0][ 1] - posTM_Nterm1[0][0] numTM_unaligned_Nterm = len(posTM_Nterm2) num_res_to_TM_Nterm = len(topo_Nterm2) - posTM_Nterm2[ len(posTM_Nterm2) - 1][1] elif len( posTM_Nterm2 ) == 1 and posTM_Nterm2[0][1] - posTM_Nterm2[0][0] < 5: num_res_unaligned_Nterm = len( topo_Nterm2.replace("-", "")) numRes_PartHelix_Nterm = posTM_Nterm2[0][ 1] - posTM_Nterm2[0][0] numTM_unaligned_Nterm = len(posTM_Nterm1) num_res_to_TM_Nterm = len(topo_Nterm1) - posTM_Nterm1[ len(posTM_Nterm1) - 1][1] if s1_Cterm == 1.5: if len( posTM_Cterm1 ) == 1 and posTM_Cterm1[0][1] - posTM_Cterm1[0][0] < 5: num_res_unaligned_Cterm = len( topo_Cterm1.replace("-", "")) numRes_PartHelix_Cterm = posTM_Cterm1[0][ 1] - posTM_Cterm1[0][0] numTM_unaligned_Cterm = len(posTM_Cterm2) num_res_to_TM_Cterm = len(topo_Cterm2) - posTM_Cterm2[ len(posTM_Cterm2) - 1][1] elif len( posTM_Cterm2 ) == 1 and posTM_Cterm2[0][1] - posTM_Cterm2[0][0] < 5: num_res_unaligned_Cterm = len( topo_Cterm2.replace("-", "")) numRes_PartHelix_Cterm = posTM_Cterm2[0][ 1] - posTM_Cterm2[0][0] numTM_unaligned_Cterm = len(posTM_Cterm1) num_res_to_TM_Cterm = len(topo_Cterm1) - posTM_Cterm1[ len(posTM_Cterm1) - 1][1] ss = "%s %s %4d %4d %4d %4d %4d %4d %4d %4d" print >> fpout1, ss % ( id1, id2, num_res_unaligned_Nterm, numRes_PartHelix_Nterm, num_res_to_TM_Nterm, numTM_unaligned_Nterm, num_res_unaligned_Cterm, numRes_PartHelix_Cterm, num_res_to_TM_Cterm, numTM_unaligned_Cterm)