示例#1
0
文件: protein.py 项目: jkjium/pyMAVEN
 def writeSPDB(self):
     aamap = AAmap()
     fo = open(self.pdbfile + '.spdb', 'w')
     for a in self.atoms:
         fo.write('%f %f %f %d %s\n' %
                  (a.x, a.y, a.z, a.resSeq, aamap.getAAmap(a.resName)))
     fo.close()
示例#2
0
	def __init__(self, nafile):
		self.pdb = nafile[0:4]
		self.rsaDict = {}
		self.resiDict = defaultdict(lambda: '')
		self.alphabet = ['B', 'E']
		aamap = AAmap()

		lines = [line.strip() for line in open(nafile)]
		for naline in lines:
			head = naline[0:3]
			if head == 'RES':
				r = rsa(naline)
				key = '%s%s%s' % (aamap.getAAmap(r.resn), r.chain, r.resi)
				self.rsaDict[key] = r

				varkey = '%s%s' % (aamap.getAAmap(r.resn), self.accessible(key))
				varvalue = '%s%s%s ' % (self.resiDict[varkey], r.chain, r.resi)
				self.resiDict[varkey] = varvalue
			elif head == 'TOTAL':
				key = 'TOTAL'
				self.rasDict[key] = naline.split()
示例#3
0
def resn2bfactor():
	if len(sys.argv) < 3:
		print 'resn2bfactor(): replace b factor values with residue type.'
		print 'resn2bfactor(): used for pymol spectrum b'
		return
	scoreValue = {
							'X':0,'-': 0,'.': 0,'A': 1,'C': 2,'D': 3,'E': 4,'F': 5,'G': 6,'H': 7,'I': 8,'K': 9,
							'L': 10,'M': 11,'N': 12,'P': 13,'Q': 14,'R': 15,'S': 16,'T': 17,'V': 18,'W': 19,'Y': 20, 'B': 3
						}
	aamap = AAmap()

	pdbfile = sys.argv[2]
	p = protein(pdbfile)
	outfile = '%s_rb.pdb' % pdbfile[:-4]
	fout = open(outfile, 'w')
	for a in p.atoms:
		newBFactor = scoreValue[aamap.getAAmap(a.resName)]
		print 'new b-factor: [%s : %s] -> %d' % (a.resName, aamap.getAAmap(a.resName), newBFactor)
		a.tempFactor = newBFactor
		fout.write(a.writeAtom())
	fout.close()
	print 'Output file: %s' % outfile
示例#4
0
    def getSeq(self):
        aamap = AAmap()
        seq=''
        last_resSeq = -1
        seqPos = 0
        for i in xrange(0,len(self.atoms)):
            a=self.atoms[i]
            if last_resSeq != a.resSeq:
                seq=seq+aamap.getAAmap(a.resName)
                last_resSeq = a.resSeq

                key = '%s%s' % (a.chainID, a.resSeq)
                self.resDict[key] = (seqPos, seq[seqPos])
                seqPos+=1
        return seq       
示例#5
0
文件: protein.py 项目: jkjium/pyMAVEN
    def getSeq(self):
        aamap = AAmap()
        seq = ''
        #last_resSeq = -1 # 1a8v the first resi starts from -1 !!!!
        last_resSeq = -9999  # 1a8v the first resi starts from -1 !!!!
        seqPos = 0
        resArray = []

        resAtomsAll = []
        resatoms = []
        for i in xrange(0, len(self.atoms)):
            a = self.atoms[i]
            if last_resSeq != a.resSeq:
                seq = seq + aamap.getAAmap(a.resName)
                last_resSeq = a.resSeq

                key = '%s%d' % (a.chainID, a.resSeq)
                self.resDict[key] = (seqPos, seq[seqPos])
                seqPos += 1

                #resArray.append('%s %s %s' % (a.chainID,aamap.getAAmap(a.resName),str(a.resSeq)))
                resArray.append(
                    (a.chainID, aamap.getAAmap(a.resName), a.resSeq))

                if len(resatoms) > 0:
                    resAtomsAll.append(resatoms)
                    resatoms = []

            resatoms.append(a)

        # after loop add the last res into resatoms
        # only resSeq change trigger adding above
        if len(resatoms) > 0:
            resAtomsAll.append(resatoms)

        return seq, resArray, resAtomsAll
示例#6
0
    def writeFASTA(self):
        fafile = self.pdb+'.fa'
        aamap = AAmap()

        seq=''
        count = 0
        last_resSeq = -1
        for i in xrange(0,len(self.atoms)):
            a=self.atoms[i]
            if last_resSeq != a.resSeq:
                seq=seq+aamap.getAAmap(a.resName)
                last_resSeq = a.resSeq
                count+=1
        seq=seq+'\n'
        header = '>%s/1-%d\n' % (self.pdb, count)
        print header+seq

        fp=open(fafile, 'w')
        fp.write(header+seq)
        fp.close()
示例#7
0
文件: protein.py 项目: jkjium/pyMAVEN
    def writeFASTA(self):
        fafile = self.pdb + '.fa'
        aamap = AAmap()

        seq = ''
        count = 0
        last_resSeq = -1
        for i in xrange(0, len(self.atoms)):
            a = self.atoms[i]
            if last_resSeq != a.resSeq:
                seq = seq + aamap.getAAmap(a.resName)
                last_resSeq = a.resSeq
                count += 1
        seq = seq + '\n'
        header = '>%s/1-%d\n' % (self.pdb, count)
        print header + seq

        fp = open(fafile, 'w')
        fp.write(header + seq)
        fp.close()
示例#8
0
def main():

	if len(sys.argv) < 3:
		print 'python proc_dendrogram.py preffix cutoff'
		exit 

	preffix = sys.argv[1]
	cutoff = float(sys.argv[2])
	# load tip pdb file
	pr = protein(preffix)
	aamap = AAmap()
	n = len(pr.atoms)

	resimap = {}
	print 'writing %s.resimap ...' % (preffix)
	fr = open(preffix+'.resimap', 'w')
	px = []

	count = 0
	for a in pr.atoms:
		px.append((a.x, a.y, a.z))
		resimap[count] = ('%s%d' % (a.chainID, a.resSeq), aamap.getAAmap(a.resName))
		fr.write('%d %s%d %s\n' % (count, a.chainID, a.resSeq, aamap.getAAmap(a.resName)))
		count+=1
	fr.close()

	x = np.array(px)

	# calculate pairwised distance
	pdist = {}
	print 'writing %s.pdist ...' % (preffix)
	fo=open(preffix+'.pdist','w')
	for i in xrange(0,len(x)):
		for j in xrange(i+1,len(x)):
			dist = np.linalg.norm(x[i]-x[j])
			pdist['%d-%d' % (i,j)] = dist
			fo.write('%d-%d : %f\n' % (i,j,dist))
	fo.close()

	# for hc extraction
	hcdict = {}
	hclist = []
	existdict = {}

	#linkage_matrix = linkage(x, "single")
	linkage_matrix = linkage(x, "complete")
	#ddata = augmented_dendrogram(linkage_matrix, color_threshold=1)
	#plt.show()
	print 'writing %s.hcluster ...' % (preffix)
	fo1 = open(preffix+'.hcluster', 'w')
	m = linkage_matrix
	for i in xrange(0,len(m)):
		#print '%d %d %d %f %d' % (n+i,m[i,0],m[i,1],m[i,2],m[i,3])
		fo1.write('%d %d %d %f %d\n' % (n+i,m[i,0],m[i,1],m[i,2],m[i,3]))
		hcline = '%d %d %d %f %d' % (n+i,m[i,0],m[i,1],m[i,2],m[i,3])
		h = hc(hcline, n)
		hcdict[h.clusterID] = h
		hclist.append(h)		
	fo1.close()

	# resolve leaves for each cluster
	print 'resolving leaves ...'
	for h in hclist:
		h.getChildren(hcdict)
		#h.dump()


	print 'iterating clusters for largest proximity contact ...'
	for i in xrange(0, n):
		leafstr = '%d %d %d 0.0 1' % (i, i, i)
		h = hc(leafstr, n)
		h.leaves = [i]
		hcdict[i] = h
		#hcdict[i].dump()


	# add single leaf in
	for i in xrange(0, n):
		existdict[i]= True

	for h in hclist:
		if h.dist <= cutoff:
			if h.c1 in existdict and h.c2 in existdict: # both been checked before
				#print '1AA'
				if existdict[h.c1] == True and existdict[h.c2] == True:
					ret = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff)
					existdict[h.clusterID] = ret
					if ret == True: # combine both and delete sub cluster in the dict
						existdict[h.c1] = False
						existdict[h.c2] = False
				elif existdict[h.c1] == False or existdict[h.c2] == False:
					existdict[h.clusterID] = False

			elif h.c1 in existdict and h.c2 not in existdict:
				#print '1AB'
				if existdict[h.c1] == False: # c1 is not a contact; get h
					existdict[h.clusterID] = False
					existdict[h.c2] = checkProximity(hcdict[h.c2], pdist, cutoff) # get c2
				elif existdict[h.c1] == True: # c1 is a contact; get c2 then get h = c1 and c2
					ret = checkProximity(hcdict[h.c2], pdist, cutoff) # get c2
					existdict[h.c2] = ret
					if ret == False:
						existdict[h.clusterID] = False
					elif ret == True: # h.c2 is a contact
						ret1 = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff)
						existdict[h.clusterID] = ret1
						if ret1 == True:
							existdict[h.c1] = False
							existdict[h.c2] = False

			elif h.c1 not in existdict and h.c2 in existdict:
				#print '1BA'
				if existdict[h.c2] == False: # c2 is not a contact; get h
					existdict[h.clusterID] = False
					existdict[h.c1] = checkProximity(hcdict[h.c1], pdist, cutoff) # get c1
				elif existdict[h.c2] == True: # c2 is a contact; get c1 then get h = c1 and c2
					ret = checkProximity(hcdict[h.c1], pdist, cutoff) # get c1
					existdict[h.c1] = ret
					if ret == False:
						existdict[h.clusterID] = False
					elif ret == True: # h.c1 is a contact
						ret1 = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff)
						existdict[h.clusterID] = ret1
						if ret1 == True:
							existdict[h.c1] = False
							existdict[h.c2] = False

			elif h.c1 not in existdict and h.c2 not in existdict:
				#print '1BB'
				r1 = checkProximity(hcdict[h.c1], pdist, cutoff)
				existdict[h.c1] = r1
				r2 = checkProximity(hcdict[h.c2], pdist, cutoff)
				existdict[h.c2] = r2
				if r1 == False or r2 == False:
					existdict[h.clusterID] = False
				elif r1 == True and r2 == True:
					ret = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff)
					if ret == True:
						existdict[h.c1] = False
						existdict[h.c2] = False

		elif h.dist > cutoff:
			#print '0XX'
			existdict[h.clusterID] = False
			if h.c1 not in existdict:
				existdict[h.c1] = checkProximity(hcdict[h.c1], pdist, cutoff)
			if h.c2 not in existdict:
				existdict[h.c2] = checkProximity(hcdict[h.c2], pdist, cutoff)


	# print out the result
	print 'writing result into %s.hcg' % preffix  
	fout = open(preffix+'.hcg', 'w')
	count=0
	for hid in existdict:
		#if hid >= N and existdict[hid] == True:
		if existdict[hid] == True:
			#fout.write('%d: %r, %s' % (hid, existdict[hid], hcdict[hid].writeString()))
			fout.write('%s,%s\n' % (preffix, hcdict[hid].writeLeaves(resimap)))
			count+=len(hcdict[hid].leaves)
	print '%d leaves in total\n' % count
示例#9
0
 def writeSPDB(self):
     aamap = AAmap()
     fo = open(self.pdbfile+'.spdb', 'w')
     for a in self.atoms:
         fo.write('%f %f %f %d %s\n' % (a.x, a.y, a.z, a.resSeq, aamap.getAAmap(a.resName)))
     fo.close()