Пример #1
0
 def writeSPDB(self):
     aamap = AAmap()
     fo = open(self.pdbfile + '.spdb', 'w')
     for a in self.atoms:
         fo.write('%f %f %f %d %s\n' %
                  (a.x, a.y, a.z, a.resSeq, aamap.getAAmap(a.resName)))
     fo.close()
Пример #2
0
    def getSeq(self):
        aamap = AAmap()
        seq=''
        last_resSeq = -1
        seqPos = 0
        for i in xrange(0,len(self.atoms)):
            a=self.atoms[i]
            if last_resSeq != a.resSeq:
                seq=seq+aamap.getAAmap(a.resName)
                last_resSeq = a.resSeq

                key = '%s%s' % (a.chainID, a.resSeq)
                self.resDict[key] = (seqPos, seq[seqPos])
                seqPos+=1
        return seq       
Пример #3
0
    def filterClusters(self):
        if len(self.pairwiseDict) == 0:
            self.pairwise()
        amap = AAmap()

        for i in xrange(0, len(self.atoms)):
            c = cluster(self.pdb, self.top, self.pfam, '', '', self.seqheader,
                        '', '', self.center, self.cutoff, self.scutoff,
                        self.flag, 1.0, self.desc)
            c.addNeighbor(amap, self.atoms[i], i)  # put itself in first
            nbnum = 0
            for j in xrange(0, len(self.atoms)):
                key = "%d-%d" % (i, j)
                if (self.pairwiseDict[key] <= self.cutoff) and (abs(i - j) >=
                                                                self.scutoff):
                    c.addNeighbor(amap, self.atoms[j], j)
                    nbnum = nbnum + 1
                    c.thetaPhi.append(
                        self.calculateThetaPhi(self.atoms[i], self.atoms[j]))
            if nbnum < self.nbcutoff:
                continue

            c.pdbidx = c.pdbidx.lstrip()  # will change meanDist
            c.pdbResSeq = c.pdbResSeq.lstrip()
            meanDist = self.clusterMeanDist(c)
            if meanDist < 5.8:
                print('%s,%0.2f,%s,%s,%s,%s') % (self.pdb, meanDist, ''.join(
                    sorted(c.str)), ''.join(sorted(
                        c.typeStr)), c.pdbResSeq, self.getSphericalStr(c))
                self.clusters.append(c)
Пример #4
0
    def writeFASTA(self):
        fafile = self.pdb+'.fa'
        aamap = AAmap()

        seq=''
        count = 0
        last_resSeq = -1
        for i in xrange(0,len(self.atoms)):
            a=self.atoms[i]
            if last_resSeq != a.resSeq:
                seq=seq+aamap.getAAmap(a.resName)
                last_resSeq = a.resSeq
                count+=1
        seq=seq+'\n'
        header = '>%s/1-%d\n' % (self.pdb, count)
        print header+seq

        fp=open(fafile, 'w')
        fp.write(header+seq)
        fp.close()
Пример #5
0
    def writeFASTA(self):
        fafile = self.pdb + '.fa'
        aamap = AAmap()

        seq = ''
        count = 0
        last_resSeq = -1
        for i in xrange(0, len(self.atoms)):
            a = self.atoms[i]
            if last_resSeq != a.resSeq:
                seq = seq + aamap.getAAmap(a.resName)
                last_resSeq = a.resSeq
                count += 1
        seq = seq + '\n'
        header = '>%s/1-%d\n' % (self.pdb, count)
        print header + seq

        fp = open(fafile, 'w')
        fp.write(header + seq)
        fp.close()
Пример #6
0
	def __init__(self, nafile):
		self.pdb = nafile[0:4]
		self.rsaDict = {}
		self.resiDict = defaultdict(lambda: '')
		self.alphabet = ['B', 'E']
		aamap = AAmap()

		lines = [line.strip() for line in open(nafile)]
		for naline in lines:
			head = naline[0:3]
			if head == 'RES':
				r = rsa(naline)
				key = '%s%s%s' % (aamap.getAAmap(r.resn), r.chain, r.resi)
				self.rsaDict[key] = r

				varkey = '%s%s' % (aamap.getAAmap(r.resn), self.accessible(key))
				varvalue = '%s%s%s ' % (self.resiDict[varkey], r.chain, r.resi)
				self.resiDict[varkey] = varvalue
			elif head == 'TOTAL':
				key = 'TOTAL'
				self.rasDict[key] = naline.split()
Пример #7
0
def resn2bfactor():
	if len(sys.argv) < 3:
		print 'resn2bfactor(): replace b factor values with residue type.'
		print 'resn2bfactor(): used for pymol spectrum b'
		return
	scoreValue = {
							'X':0,'-': 0,'.': 0,'A': 1,'C': 2,'D': 3,'E': 4,'F': 5,'G': 6,'H': 7,'I': 8,'K': 9,
							'L': 10,'M': 11,'N': 12,'P': 13,'Q': 14,'R': 15,'S': 16,'T': 17,'V': 18,'W': 19,'Y': 20, 'B': 3
						}
	aamap = AAmap()

	pdbfile = sys.argv[2]
	p = protein(pdbfile)
	outfile = '%s_rb.pdb' % pdbfile[:-4]
	fout = open(outfile, 'w')
	for a in p.atoms:
		newBFactor = scoreValue[aamap.getAAmap(a.resName)]
		print 'new b-factor: [%s : %s] -> %d' % (a.resName, aamap.getAAmap(a.resName), newBFactor)
		a.tempFactor = newBFactor
		fout.write(a.writeAtom())
	fout.close()
	print 'Output file: %s' % outfile
Пример #8
0
    def getSeq(self):
        aamap = AAmap()
        seq = ''
        #last_resSeq = -1 # 1a8v the first resi starts from -1 !!!!
        last_resSeq = -9999  # 1a8v the first resi starts from -1 !!!!
        seqPos = 0
        resArray = []

        resAtomsAll = []
        resatoms = []
        for i in xrange(0, len(self.atoms)):
            a = self.atoms[i]
            if last_resSeq != a.resSeq:
                seq = seq + aamap.getAAmap(a.resName)
                last_resSeq = a.resSeq

                key = '%s%d' % (a.chainID, a.resSeq)
                self.resDict[key] = (seqPos, seq[seqPos])
                seqPos += 1

                #resArray.append('%s %s %s' % (a.chainID,aamap.getAAmap(a.resName),str(a.resSeq)))
                resArray.append(
                    (a.chainID, aamap.getAAmap(a.resName), a.resSeq))

                if len(resatoms) > 0:
                    resAtomsAll.append(resatoms)
                    resatoms = []

            resatoms.append(a)

        # after loop add the last res into resatoms
        # only resSeq change trigger adding above
        if len(resatoms) > 0:
            resAtomsAll.append(resatoms)

        return seq, resArray, resAtomsAll
Пример #9
0
def main():

	if len(sys.argv) < 3:
		print 'python proc_dendrogram.py preffix cutoff'
		exit 

	preffix = sys.argv[1]
	cutoff = float(sys.argv[2])
	# load tip pdb file
	pr = protein(preffix)
	aamap = AAmap()
	n = len(pr.atoms)

	resimap = {}
	print 'writing %s.resimap ...' % (preffix)
	fr = open(preffix+'.resimap', 'w')
	px = []

	count = 0
	for a in pr.atoms:
		px.append((a.x, a.y, a.z))
		resimap[count] = ('%s%d' % (a.chainID, a.resSeq), aamap.getAAmap(a.resName))
		fr.write('%d %s%d %s\n' % (count, a.chainID, a.resSeq, aamap.getAAmap(a.resName)))
		count+=1
	fr.close()

	x = np.array(px)

	# calculate pairwised distance
	pdist = {}
	print 'writing %s.pdist ...' % (preffix)
	fo=open(preffix+'.pdist','w')
	for i in xrange(0,len(x)):
		for j in xrange(i+1,len(x)):
			dist = np.linalg.norm(x[i]-x[j])
			pdist['%d-%d' % (i,j)] = dist
			fo.write('%d-%d : %f\n' % (i,j,dist))
	fo.close()

	# for hc extraction
	hcdict = {}
	hclist = []
	existdict = {}

	#linkage_matrix = linkage(x, "single")
	linkage_matrix = linkage(x, "complete")
	#ddata = augmented_dendrogram(linkage_matrix, color_threshold=1)
	#plt.show()
	print 'writing %s.hcluster ...' % (preffix)
	fo1 = open(preffix+'.hcluster', 'w')
	m = linkage_matrix
	for i in xrange(0,len(m)):
		#print '%d %d %d %f %d' % (n+i,m[i,0],m[i,1],m[i,2],m[i,3])
		fo1.write('%d %d %d %f %d\n' % (n+i,m[i,0],m[i,1],m[i,2],m[i,3]))
		hcline = '%d %d %d %f %d' % (n+i,m[i,0],m[i,1],m[i,2],m[i,3])
		h = hc(hcline, n)
		hcdict[h.clusterID] = h
		hclist.append(h)		
	fo1.close()

	# resolve leaves for each cluster
	print 'resolving leaves ...'
	for h in hclist:
		h.getChildren(hcdict)
		#h.dump()


	print 'iterating clusters for largest proximity contact ...'
	for i in xrange(0, n):
		leafstr = '%d %d %d 0.0 1' % (i, i, i)
		h = hc(leafstr, n)
		h.leaves = [i]
		hcdict[i] = h
		#hcdict[i].dump()


	# add single leaf in
	for i in xrange(0, n):
		existdict[i]= True

	for h in hclist:
		if h.dist <= cutoff:
			if h.c1 in existdict and h.c2 in existdict: # both been checked before
				#print '1AA'
				if existdict[h.c1] == True and existdict[h.c2] == True:
					ret = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff)
					existdict[h.clusterID] = ret
					if ret == True: # combine both and delete sub cluster in the dict
						existdict[h.c1] = False
						existdict[h.c2] = False
				elif existdict[h.c1] == False or existdict[h.c2] == False:
					existdict[h.clusterID] = False

			elif h.c1 in existdict and h.c2 not in existdict:
				#print '1AB'
				if existdict[h.c1] == False: # c1 is not a contact; get h
					existdict[h.clusterID] = False
					existdict[h.c2] = checkProximity(hcdict[h.c2], pdist, cutoff) # get c2
				elif existdict[h.c1] == True: # c1 is a contact; get c2 then get h = c1 and c2
					ret = checkProximity(hcdict[h.c2], pdist, cutoff) # get c2
					existdict[h.c2] = ret
					if ret == False:
						existdict[h.clusterID] = False
					elif ret == True: # h.c2 is a contact
						ret1 = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff)
						existdict[h.clusterID] = ret1
						if ret1 == True:
							existdict[h.c1] = False
							existdict[h.c2] = False

			elif h.c1 not in existdict and h.c2 in existdict:
				#print '1BA'
				if existdict[h.c2] == False: # c2 is not a contact; get h
					existdict[h.clusterID] = False
					existdict[h.c1] = checkProximity(hcdict[h.c1], pdist, cutoff) # get c1
				elif existdict[h.c2] == True: # c2 is a contact; get c1 then get h = c1 and c2
					ret = checkProximity(hcdict[h.c1], pdist, cutoff) # get c1
					existdict[h.c1] = ret
					if ret == False:
						existdict[h.clusterID] = False
					elif ret == True: # h.c1 is a contact
						ret1 = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff)
						existdict[h.clusterID] = ret1
						if ret1 == True:
							existdict[h.c1] = False
							existdict[h.c2] = False

			elif h.c1 not in existdict and h.c2 not in existdict:
				#print '1BB'
				r1 = checkProximity(hcdict[h.c1], pdist, cutoff)
				existdict[h.c1] = r1
				r2 = checkProximity(hcdict[h.c2], pdist, cutoff)
				existdict[h.c2] = r2
				if r1 == False or r2 == False:
					existdict[h.clusterID] = False
				elif r1 == True and r2 == True:
					ret = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff)
					if ret == True:
						existdict[h.c1] = False
						existdict[h.c2] = False

		elif h.dist > cutoff:
			#print '0XX'
			existdict[h.clusterID] = False
			if h.c1 not in existdict:
				existdict[h.c1] = checkProximity(hcdict[h.c1], pdist, cutoff)
			if h.c2 not in existdict:
				existdict[h.c2] = checkProximity(hcdict[h.c2], pdist, cutoff)


	# print out the result
	print 'writing result into %s.hcg' % preffix  
	fout = open(preffix+'.hcg', 'w')
	count=0
	for hid in existdict:
		#if hid >= N and existdict[hid] == True:
		if existdict[hid] == True:
			#fout.write('%d: %r, %s' % (hid, existdict[hid], hcdict[hid].writeString()))
			fout.write('%s,%s\n' % (preffix, hcdict[hid].writeLeaves(resimap)))
			count+=len(hcdict[hid].leaves)
	print '%d leaves in total\n' % count
Пример #10
0
 def writeSPDB(self):
     aamap = AAmap()
     fo = open(self.pdbfile+'.spdb', 'w')
     for a in self.atoms:
         fo.write('%f %f %f %d %s\n' % (a.x, a.y, a.z, a.resSeq, aamap.getAAmap(a.resName)))
     fo.close()
Пример #11
0
    def __init__(self,
                 pdbname,
                 chain='all',
                 top='',
                 pfam='',
                 center='CA',
                 cutoff=5,
                 scutoff=1,
                 flag=0,
                 desc='',
                 nbcutoff=4):
        self.atoms = []
        #dictionary for pairwise distance
        self.pairwiseDict = {}
        self.clusters = []

        #pdb, top, pfam, str, pdbidx, seqheader, alignstr, alignidx, center, cutoff, scutoff, flag, desc
        #self.pdb = pdbname[len(pdbname)-8:len(pdbname)-4]
        self.pdbfile = pdbname
        self.pdb = pdbname[:-4]
        self.chain = chain
        self.top = top
        self.pfam = pfam
        self.center = center
        self.cutoff = cutoff
        self.scutoff = scutoff
        self.seqheader = self.pdb
        self.flag = flag
        self.desc = desc

        self.nbcutoff = nbcutoff
        self.ca = []

        fin = open(pdbname, 'r')
        lines = fin.readlines()
        fin.close()

        lastname = ''
        lastres = ''
        aamap = AAmap()
        for i in xrange(0, len(lines)):
            line = lines[i]
            # load only one model
            if 'END' in line[0:6]:
                break
            if line[17:20].strip() not in aamap.AAA2A:
                continue
            if self.chain != 'all':
                if (self.chain != line[21]):
                    continue
            if line[0:6] == 'ATOM  ':
                at = atom(lines[i])
                if (at.name == lastname) and (at.resSeq == lastres):
                    #print '[%s]::alter loc:\n%s' % (self.pdbfile, lines[i])
                    #if (line[16]==' ' or line[16]=='A'): # to avoid alternative location
                    continue
                else:
                    self.atoms.append(at)
                    if at.name.strip() == 'CA':
                        self.ca.append(at)
                    lastname = at.name
                    lastres = at.resSeq

        # map for Chain+Resi : (index in sequence, ResName)
        # 'B529': (132, 'V')
        self.resDict = {}  # assigned in self.getSeq() function
        # resAtoms, a list of lists, each (element) list contains atoms of residues
        # resArray, gives a list of keys eg. (A,Q,70), (A,I,71), (A,V,72)
        self.seq, self.resArray, self.resAtoms = self.getSeq()

        # some residue does not have CA!! 1e6i.aln.pdb the last residue
        #aamap = AAmap()
        #self.seq = ''.join([aamap.getAAmap(a.resName) for a in self.ca])

        # map for sequence index: Chain+Resi(ResName)
        # 132 : 'B529(V)'
        self.seqDict = {-1: '.'}
        for r in self.resDict:
            self.seqDict[self.resDict[r][0]] = '%s(%s)' % (r,
                                                           self.resDict[r][1])