def writeSPDB(self): aamap = AAmap() fo = open(self.pdbfile + '.spdb', 'w') for a in self.atoms: fo.write('%f %f %f %d %s\n' % (a.x, a.y, a.z, a.resSeq, aamap.getAAmap(a.resName))) fo.close()
def __init__(self, nafile): self.pdb = nafile[0:4] self.rsaDict = {} self.resiDict = defaultdict(lambda: '') self.alphabet = ['B', 'E'] aamap = AAmap() lines = [line.strip() for line in open(nafile)] for naline in lines: head = naline[0:3] if head == 'RES': r = rsa(naline) key = '%s%s%s' % (aamap.getAAmap(r.resn), r.chain, r.resi) self.rsaDict[key] = r varkey = '%s%s' % (aamap.getAAmap(r.resn), self.accessible(key)) varvalue = '%s%s%s ' % (self.resiDict[varkey], r.chain, r.resi) self.resiDict[varkey] = varvalue elif head == 'TOTAL': key = 'TOTAL' self.rasDict[key] = naline.split()
def resn2bfactor(): if len(sys.argv) < 3: print 'resn2bfactor(): replace b factor values with residue type.' print 'resn2bfactor(): used for pymol spectrum b' return scoreValue = { 'X':0,'-': 0,'.': 0,'A': 1,'C': 2,'D': 3,'E': 4,'F': 5,'G': 6,'H': 7,'I': 8,'K': 9, 'L': 10,'M': 11,'N': 12,'P': 13,'Q': 14,'R': 15,'S': 16,'T': 17,'V': 18,'W': 19,'Y': 20, 'B': 3 } aamap = AAmap() pdbfile = sys.argv[2] p = protein(pdbfile) outfile = '%s_rb.pdb' % pdbfile[:-4] fout = open(outfile, 'w') for a in p.atoms: newBFactor = scoreValue[aamap.getAAmap(a.resName)] print 'new b-factor: [%s : %s] -> %d' % (a.resName, aamap.getAAmap(a.resName), newBFactor) a.tempFactor = newBFactor fout.write(a.writeAtom()) fout.close() print 'Output file: %s' % outfile
def getSeq(self): aamap = AAmap() seq='' last_resSeq = -1 seqPos = 0 for i in xrange(0,len(self.atoms)): a=self.atoms[i] if last_resSeq != a.resSeq: seq=seq+aamap.getAAmap(a.resName) last_resSeq = a.resSeq key = '%s%s' % (a.chainID, a.resSeq) self.resDict[key] = (seqPos, seq[seqPos]) seqPos+=1 return seq
def getSeq(self): aamap = AAmap() seq = '' #last_resSeq = -1 # 1a8v the first resi starts from -1 !!!! last_resSeq = -9999 # 1a8v the first resi starts from -1 !!!! seqPos = 0 resArray = [] resAtomsAll = [] resatoms = [] for i in xrange(0, len(self.atoms)): a = self.atoms[i] if last_resSeq != a.resSeq: seq = seq + aamap.getAAmap(a.resName) last_resSeq = a.resSeq key = '%s%d' % (a.chainID, a.resSeq) self.resDict[key] = (seqPos, seq[seqPos]) seqPos += 1 #resArray.append('%s %s %s' % (a.chainID,aamap.getAAmap(a.resName),str(a.resSeq))) resArray.append( (a.chainID, aamap.getAAmap(a.resName), a.resSeq)) if len(resatoms) > 0: resAtomsAll.append(resatoms) resatoms = [] resatoms.append(a) # after loop add the last res into resatoms # only resSeq change trigger adding above if len(resatoms) > 0: resAtomsAll.append(resatoms) return seq, resArray, resAtomsAll
def writeFASTA(self): fafile = self.pdb+'.fa' aamap = AAmap() seq='' count = 0 last_resSeq = -1 for i in xrange(0,len(self.atoms)): a=self.atoms[i] if last_resSeq != a.resSeq: seq=seq+aamap.getAAmap(a.resName) last_resSeq = a.resSeq count+=1 seq=seq+'\n' header = '>%s/1-%d\n' % (self.pdb, count) print header+seq fp=open(fafile, 'w') fp.write(header+seq) fp.close()
def writeFASTA(self): fafile = self.pdb + '.fa' aamap = AAmap() seq = '' count = 0 last_resSeq = -1 for i in xrange(0, len(self.atoms)): a = self.atoms[i] if last_resSeq != a.resSeq: seq = seq + aamap.getAAmap(a.resName) last_resSeq = a.resSeq count += 1 seq = seq + '\n' header = '>%s/1-%d\n' % (self.pdb, count) print header + seq fp = open(fafile, 'w') fp.write(header + seq) fp.close()
def main(): if len(sys.argv) < 3: print 'python proc_dendrogram.py preffix cutoff' exit preffix = sys.argv[1] cutoff = float(sys.argv[2]) # load tip pdb file pr = protein(preffix) aamap = AAmap() n = len(pr.atoms) resimap = {} print 'writing %s.resimap ...' % (preffix) fr = open(preffix+'.resimap', 'w') px = [] count = 0 for a in pr.atoms: px.append((a.x, a.y, a.z)) resimap[count] = ('%s%d' % (a.chainID, a.resSeq), aamap.getAAmap(a.resName)) fr.write('%d %s%d %s\n' % (count, a.chainID, a.resSeq, aamap.getAAmap(a.resName))) count+=1 fr.close() x = np.array(px) # calculate pairwised distance pdist = {} print 'writing %s.pdist ...' % (preffix) fo=open(preffix+'.pdist','w') for i in xrange(0,len(x)): for j in xrange(i+1,len(x)): dist = np.linalg.norm(x[i]-x[j]) pdist['%d-%d' % (i,j)] = dist fo.write('%d-%d : %f\n' % (i,j,dist)) fo.close() # for hc extraction hcdict = {} hclist = [] existdict = {} #linkage_matrix = linkage(x, "single") linkage_matrix = linkage(x, "complete") #ddata = augmented_dendrogram(linkage_matrix, color_threshold=1) #plt.show() print 'writing %s.hcluster ...' % (preffix) fo1 = open(preffix+'.hcluster', 'w') m = linkage_matrix for i in xrange(0,len(m)): #print '%d %d %d %f %d' % (n+i,m[i,0],m[i,1],m[i,2],m[i,3]) fo1.write('%d %d %d %f %d\n' % (n+i,m[i,0],m[i,1],m[i,2],m[i,3])) hcline = '%d %d %d %f %d' % (n+i,m[i,0],m[i,1],m[i,2],m[i,3]) h = hc(hcline, n) hcdict[h.clusterID] = h hclist.append(h) fo1.close() # resolve leaves for each cluster print 'resolving leaves ...' for h in hclist: h.getChildren(hcdict) #h.dump() print 'iterating clusters for largest proximity contact ...' for i in xrange(0, n): leafstr = '%d %d %d 0.0 1' % (i, i, i) h = hc(leafstr, n) h.leaves = [i] hcdict[i] = h #hcdict[i].dump() # add single leaf in for i in xrange(0, n): existdict[i]= True for h in hclist: if h.dist <= cutoff: if h.c1 in existdict and h.c2 in existdict: # both been checked before #print '1AA' if existdict[h.c1] == True and existdict[h.c2] == True: ret = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff) existdict[h.clusterID] = ret if ret == True: # combine both and delete sub cluster in the dict existdict[h.c1] = False existdict[h.c2] = False elif existdict[h.c1] == False or existdict[h.c2] == False: existdict[h.clusterID] = False elif h.c1 in existdict and h.c2 not in existdict: #print '1AB' if existdict[h.c1] == False: # c1 is not a contact; get h existdict[h.clusterID] = False existdict[h.c2] = checkProximity(hcdict[h.c2], pdist, cutoff) # get c2 elif existdict[h.c1] == True: # c1 is a contact; get c2 then get h = c1 and c2 ret = checkProximity(hcdict[h.c2], pdist, cutoff) # get c2 existdict[h.c2] = ret if ret == False: existdict[h.clusterID] = False elif ret == True: # h.c2 is a contact ret1 = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff) existdict[h.clusterID] = ret1 if ret1 == True: existdict[h.c1] = False existdict[h.c2] = False elif h.c1 not in existdict and h.c2 in existdict: #print '1BA' if existdict[h.c2] == False: # c2 is not a contact; get h existdict[h.clusterID] = False existdict[h.c1] = checkProximity(hcdict[h.c1], pdist, cutoff) # get c1 elif existdict[h.c2] == True: # c2 is a contact; get c1 then get h = c1 and c2 ret = checkProximity(hcdict[h.c1], pdist, cutoff) # get c1 existdict[h.c1] = ret if ret == False: existdict[h.clusterID] = False elif ret == True: # h.c1 is a contact ret1 = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff) existdict[h.clusterID] = ret1 if ret1 == True: existdict[h.c1] = False existdict[h.c2] = False elif h.c1 not in existdict and h.c2 not in existdict: #print '1BB' r1 = checkProximity(hcdict[h.c1], pdist, cutoff) existdict[h.c1] = r1 r2 = checkProximity(hcdict[h.c2], pdist, cutoff) existdict[h.c2] = r2 if r1 == False or r2 == False: existdict[h.clusterID] = False elif r1 == True and r2 == True: ret = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff) if ret == True: existdict[h.c1] = False existdict[h.c2] = False elif h.dist > cutoff: #print '0XX' existdict[h.clusterID] = False if h.c1 not in existdict: existdict[h.c1] = checkProximity(hcdict[h.c1], pdist, cutoff) if h.c2 not in existdict: existdict[h.c2] = checkProximity(hcdict[h.c2], pdist, cutoff) # print out the result print 'writing result into %s.hcg' % preffix fout = open(preffix+'.hcg', 'w') count=0 for hid in existdict: #if hid >= N and existdict[hid] == True: if existdict[hid] == True: #fout.write('%d: %r, %s' % (hid, existdict[hid], hcdict[hid].writeString())) fout.write('%s,%s\n' % (preffix, hcdict[hid].writeLeaves(resimap))) count+=len(hcdict[hid].leaves) print '%d leaves in total\n' % count
def writeSPDB(self): aamap = AAmap() fo = open(self.pdbfile+'.spdb', 'w') for a in self.atoms: fo.write('%f %f %f %d %s\n' % (a.x, a.y, a.z, a.resSeq, aamap.getAAmap(a.resName))) fo.close()