示例#1
0
文件: compare.py 项目: crosvera/ProDy
def matchAlign(mobile, target, **kwargs):
    """Superpose *mobile* onto *target* based on best matching pair of chains.
    
    .. versionadded:: 0.7.1

    This function makes use of :func:`matchChains` for matching chains.
  
    This function returns a tuple that contains the following items:
      
      * *mobile* after it is superposed,
      * Matching chain from *mobile* as a :class:`~prody.atomic.AtomMap` 
        instance, 
      * Matching chain from *target* as a :class:`~prody.atomic.AtomMap` 
        instance,
      * Percent sequence identity of the match,
      * Percent sequence overlap of the match.
     
    """
    
    match = matchChains(mobile, target, **kwargs)
    if not match:
        return
    match = match[0]
    LOGGER.info('RMSD before alignment (A): {0:.2f}'
                .format(prody.calcRMSD(match[0], match[1])))
    prody.calcTransformation(match[0], match[1]).apply(mobile)
    LOGGER.info('RMSD after alignment  (A): {0:.2f}'
                .format(prody.calcRMSD(match[0], match[1])))
    return (mobile,) + match
示例#2
0
def clusterize(sorted_ids):
    """Clusters the structures identified by the IDS inside sorted_ids list"""

    clusters_found = 0
    clusters = {clusters_found: [sorted_ids[0]]}

    # Read all structures backbone atoms
    backbone_atoms = get_backbone_atoms(sorted_ids)

    for j in sorted_ids[1:]:
        log.info("Glowworm %d with pdb lightdock_%d.pdb" % (j, j))
        in_cluster = False
        for cluster_id in list(clusters.keys()):
            # For each cluster representative
            representative_id = clusters[cluster_id][0]
            rmsd = calcRMSD(backbone_atoms[representative_id],
                            backbone_atoms[j]).round(4)
            log.info('RMSD between %d and %d is %5.3f' %
                     (representative_id, j, rmsd))
            if rmsd <= 4.0:
                clusters[cluster_id].append(j)
                log.info("Glowworm %d goes into cluster %d" % (j, cluster_id))
                in_cluster = True
                break

        if not in_cluster:
            clusters_found += 1
            clusters[clusters_found] = [j]
            log.info("New cluster %d" % clusters_found)
    return clusters
示例#3
0
 def calc_rmsd_matrix_intra(self, align=False, sel='all'):
     ag = self.ag.copy()
     rmsd = []
     for i in range(ag.numCoordsets()):
         ag.setACSIndex(i)
         if align:
             prody.alignCoordsets(ag.select(sel))
         rmsd.append([prody.calcRMSD(ag.select(sel))])
     rmsd = np.concatenate(rmsd)
     return rmsd
示例#4
0
def compare_pdb_files(file1, file2):
    """Returns the RMSD between two PDB files of the same protein.

    Args:
        file1 (str): Path to first PDB file.
        file2 (str): Path to second PDB file. Must be the same protein as in file1.

    Returns:
        float: Root Mean Squared Deviation (RMSD) between the two structures.
    """
    s1 = pr.parsePDB(file1)
    s2 = pr.parsePDB(file2)
    transformation = pr.calcTransformation(s1, s2)
    s1_aligned = transformation.apply(s1)
    return pr.calcRMSD(s1_aligned, s2)
示例#5
0
def prody_align(opt):
    """Align models in a PDB file or a PDB file onto others."""
            
    import prody
    LOGGER = prody.LOGGER

    args = opt.pdb
    if len(args) == 1:
        pdb = args[0]
        LOGGER.info('Aligning multiple models in: ' + pdb)
        selstr, prefix, model = opt.select, opt.prefix, opt.model
        pdb = prody.parsePDB(pdb)
        pdbselect = pdb.select(selstr)
        if pdbselect is None:
            LOGGER.warning('Selection "{0:s}" do not match any atoms.'
                           .format(selstr))
            sys.exit(-1)
        LOGGER.info('{0:d} atoms will be used for alignment.'
                               .format(len(pdbselect)))
        pdb.setACSIndex(model-1)
        prody.alignCoordsets(pdb, selstr=selstr)
        rmsd = prody.calcRMSD(pdb)
        LOGGER.info('Max RMSD: {0:0.2f} Mean RMSD: {1:0.2f}'
              .format(rmsd.max(), rmsd.mean()))
        if prefix == '':
            prefix = pdb.getTitle() + '_aligned'
        outfn = prefix + '.pdb'
        LOGGER.info('Writing file: ' + outfn)
        prody.writePDB(outfn, pdb)
    else:
        reffn = args.pop(0)
        LOGGER.info('Aligning structures onto: ' + reffn)
        ref = prody.parsePDB(reffn)
        for arg in args:
            if arg == reffn:
                continue
            if '_aligned.pdb' in arg:
                continue
            pdb = prody.parsePDB(arg)
            if prody.matchAlign(pdb, ref):
                outfn = pdb.getTitle() + '_aligned.pdb'
                LOGGER.info('Writing file: ' + outfn)
                prody.writePDB(outfn, pdb)
            else:
                LOGGER.warning('Failed to align ' + arg)
示例#6
0
    def calc_rmsd_with(self, mol, align=False, sel='all'):
        ag1 = self.ag.copy()
        ag2 = mol.ag.copy()
        sel1 = ag1.select(sel).copy()
        sel2 = ag2.select(sel).copy()
        if sel1 is None or sel2 is None:
            raise RuntimeError('Selection is empty')
        if sel1.numAtoms() != sel2.numAtoms():
            raise RuntimeError('Selections are different')

        merged = np.concatenate([sel1.getCoordsets(), sel2.getCoordsets()])
        n1, n2 = sel1.numCoordsets(), sel2.numCoordsets()
        sel1.setCoords(merged)
        rmsd = []
        for i in range(n1):
            sel1.setACSIndex(i)
            if align:
                prody.alignCoordsets(sel1)
            rmsd.append([prody.calcRMSD(sel1)[n1:]])
        rmsd = np.concatenate(rmsd)
        return rmsd
示例#7
0
def corepagecalculation(pdbfilename, selatom, noma1, nummodes, gamcut, cut1, gam2, cut2, showresults, smodes, snmd, smodel, scollec, massnomass, sample1, modeens, confens, rmsdens, traverse1, modetra, steptra, rmsdtra, modelnumber, caanm, cagnm, nohanm, nohgnm, allanm, allgnm, bbanm, bbgnm, scanm, scgnm, nmdfolder, modesfolder, collectivityfolder, modelnewname, nmdnewname, modesnewname, modesendname, collectivitynewname, collectivityendname, samplenewname, traversenewname, crosscorr=0, corrfolder='', corrname='', corrend='', compmode01='7', compmode02='15', sqflucts=0, sqfluctsfolder='', sqfluctsname='', sqfluctsend='', separatevar1='0', temfac=0, temfacfolder='', temfacname='', temfacend='', fracovar=0, fraconame='', fracoend='', ovlap=0, ovlapfold='', ovlapname='', ovlapend='', ovlaptab=0, ovlaptabname='', ovlaptabend='', comppdbfilename=''):
# modelnumber
	import prody
	import time
	import os
	import Tkinter
	root=Tkinter.Tk()
	root.title('Info')
	onlypage=Tkinter.Frame(root)
	onlypage.pack(side='top')
	Tkinter.Label(onlypage,text='File: '+pdbfilename).grid(row=0,column=0,sticky='w')
	Tkinter.Label(onlypage,text='Atoms: '+selatom).grid(row=1,column=0,sticky='w')
	Tkinter.Label(onlypage,text='Analysis: '+noma1).grid(row=2,column=0,sticky='w')
	path=os.path.join(os.path.expanduser('~'),'.noma/')
	fin = open(path+'savefile.txt','r')
	global savedfile
	savedfile=fin.readlines()
	fin.close()
	i=0
	a=len(savedfile)
	while i<a:
		savedfile[i]=savedfile[i][:-1]
		i+=1
	if gamcut=='0':
		Tkinter.Label(onlypage,text='Gamma: r^'+savedfile[91]).grid(row=3,column=0,sticky='w')
		Tkinter.Label(onlypage,text='Cutoff: '+cut1).grid(row=4,column=0,sticky='w')
	elif gamcut=='1':
		Tkinter.Label(onlypage,text='Gamma: '+gam2).grid(row=3,column=0,sticky='w')
		Tkinter.Label(onlypage,text='Cutoff: '+cut2).grid(row=4,column=0,sticky='w')



	find = 0					#
	while find < len(pdbfilename):			#
		if pdbfilename[-(find+1):-find] == '/':	#
			bgn = len(pdbfilename)-find		#
			break				#
		else:					# helps in the
			find +=1			# saving of files
	try:						#
		float(bgn)				#
	except (NameError):				#
		bgn = 0					#
	find = 0					#
	while bgn+find<len(pdbfilename):			#
		if pdbfilename[bgn+find:bgn+find+1] == '.':	#
			end = len(pdbfilename)-(bgn+find)	#
			break				#
		else:					#
			find +=1			#
	try:						#
		name = pdbfilename[bgn:-end]			#
	except (NameError):				#
		name = pdbfilename[bgn:len(pdbfilename)]		# name of the file
	bgn = pdbfilename[:bgn]				# path for file
	mytimeis = time.asctime(time.localtime(time.time()))
	start = time.time()
	try:
		p38 = prody.parsePDB(pdbfilename,model=int(modelnumber))
	except:
		import tkMessageBox
		tkMessageBox.askokcancel("File Error","""This is not the correct path or name. Try entering /some/path/nameoffile.pdb
If you need help finding the path, open a new terminal and enter:
find -name 'filename.pdb'        use the output as the pdb input
If this doesn't work, make sure the file is in PDB format.""")
		p38 = prody.parsePDB(pdbfilename)
	print 'Submitted: '+pdbfilename+' at '+mytimeis
	Tkinter.Label(onlypage,text='Submitted at: '+mytimeis).grid(row=5,column=0,sticky='w')
	root.update()
	if selatom == "C-alpha" and noma1 == "Gaussian Normal Mode":
		folder = cagnm+'/'
		pro = p38.select('protein and name CA')	# selects only carbon alpahs
	elif selatom == "C-alpha" and noma1 == "Anisotropic Normal Mode":
		folder = caanm+'/'
		pro = p38.select('protein and name CA')
	elif selatom == "Heavy" and noma1 == "Gaussian Normal Mode":
		folder = nohgnm+'/'
		pro = p38.select('protein and not name "[1-9]?H.*"') # gets rid of all Hydrogens
	elif selatom == "Heavy" and noma1 == "Anisotropic Normal Mode":
		folder = nohanm+'/'
		pro = p38.select('protein and not name "[1-9]?H.*"')
	elif selatom == "All" and noma1 == "Gaussian Normal Mode":
		folder = allgnm+'/'
		pro = p38.select('protein')
	elif selatom == "All" and noma1 == "Anisotropic Normal Mode":
		folder = allanm+'/'
		pro = p38.select('protein')
	elif selatom == "Backbone" and noma1 == "Gaussian Normal Mode":
		folder = bbgnm+'/'
		pro = p38.select('protein and name CA C O N H')	# selects backbone
	elif selatom == "Backbone" and noma1 == "Anisotropic Normal Mode":
		folder = bbanm+'/'
		pro = p38.select('protein and name CA C O N H')	# selects backbone
	elif selatom == "Sidechain" and noma1 == "Gaussian Normal Mode":
		folder = scgnm+'/'
		pro = p38.select('protein and not name CA C O N H')	# selects sidechain
	elif selatom == "Sidechain" and noma1 == "Anisotropic Normal Mode":
		folder = scanm+'/'
		pro = p38.select('protein and not name CA C O N H')	# selects sidechain
	try:							#
		open(bgn+folder)				# creates the folders
	except (IOError):					# where the files will
		try:						# be saved only if they
			os.makedirs(bgn+folder)			# are not there
		except (OSError):				#
			mer = 0					#
	if noma1 == "Gaussian Normal Mode":
		print 'Building the Kirchhoff matrix'
		Tkinter.Label(onlypage,text='Building Kirchhoff').grid(row=6,column=0,sticky='w')
		root.update()
		anm = prody.GNM(name)###
		if gamcut=='0':
			anm.buildKirchhoff(pro,cutoff=float(cut1),gamma=gammaDistanceDependent)###
			anm.setKirchhoff(anm.getKirchhoff())
		elif gamcut=='1':
			anm.buildKirchhoff(pro,cutoff=float(cut2),gamma=float(gam2))###
		brat = 2
	elif noma1 == "Anisotropic Normal Mode":
		print 'Building the Hessian matrix'
		Tkinter.Label(onlypage,text='Building Hessian').grid(row=6,column=0,sticky='w')
		root.update()
		anm = prody.ANM(name)###
		if gamcut=='0':
			anm.buildHessian(pro,cutoff=float(cut1),gamma=gammaDistanceDependent)###
			anm.setHessian(anm.getHessian())###
		elif gamcut=='1':
			anm.buildHessian(pro,cutoff=float(cut2),gamma=float(gam2))###
		brat = 7
	print 'Calculating modes'
	Tkinter.Label(onlypage,text='Calculating modes').grid(row=7,column=0,sticky='w')
	root.update()
	anm.calcModes(int(nummodes),zeros = True)###
	numatom=anm.numAtoms()###
	eigval=anm.getEigvals()###
	atomname=pro.getNames()###
	if smodel==1:
		if brat==2:
			modelfilename=bgn+folder+name+modelnewname+'.gnm.npz'
		elif brat==7:
			modelfilename=bgn+folder+name+modelnewname+'.anm.npz'
		print 'Saving Model'
		Tkinter.Label(onlypage,text='Saving Model').grid(row=8,column=0,sticky='w')
		root.update()
		try:
			prody.saveModel(anm,bgn+folder+name+modelnewname,True)###
		except:
			print 'Matrix not saved due to size'
			Tkinter.Label(onlypage,text='Matrix not saved').grid(row=8,column=0,sticky='w')
			root.update()
			prody.saveModel(anm,bgn+folder+name+modelnewname)###
	if snmd==1:
		print 'Saving NMD'
		Tkinter.Label(onlypage,text='Saving NMD').grid(row=9,column=0,sticky='w')
		root.update()
		try:						#
			os.makedirs(bgn+folder+nmdfolder+'/')		#
		except (OSError):				#
			mer = 0					#
		prody.writeNMD(bgn+folder+nmdfolder+'/'+name+nmdnewname+'.nmd',anm[:len(eigval)],pro)###	# this can be viewed in VMD
	if smodes==1:
		print 'Saving Modes'
		Tkinter.Label(onlypage,text='Saving Modes').grid(row=10,column=0,sticky='w')
		root.update()
		try:						#
			os.makedirs(bgn+folder+modesfolder+'/')	#
		except (OSError):				#
			mer = 0					#
		modefile = bgn+folder+modesfolder+'/'+name+modesnewname+'.'+modesendname
		fout = open(modefile,'w')
		mer = 0
		while mer< len(eigval):
			slowest_mode = anm[mer]###
			r = slowest_mode.getEigvec()###
			p = slowest_mode.getEigval()###
			tq = 0
			tt = 0
			ttt = 1
			tttt = 2
			fout.write('MODE {0:3d}		{1:15e}'.format(mer+1,p))
			fout.write("""
-------------------------------------------------
""")
			if noma1 == "Gaussian Normal Mode":
				while tq < numatom:
					fout.write("""{0:4s}{1:15e}
""".format(atomname[tq],r[tq]))
					tq +=1
			elif noma1 == "Anisotropic Normal Mode":
				while tt < numatom*3:
					fout.write("""{0:4s}{1:15e}{2:15e}{3:15e}
""".format(atomname[tq],r[tt],r[ttt],r[tttt]))
					tq+=1
					tt +=3
					ttt+=3
					tttt+=3
			mer +=1
		fout.close()
		if showresults=='1':
			os.system('/usr/bin/gnome-open '+modefile)
	if scollec==1:
		print 'Saving collectivity'
		Tkinter.Label(onlypage,text='Saving collectivity').grid(row=11,column=0,sticky='w')
		root.update()
		try:						#
			os.makedirs(bgn+folder+collectivityfolder+'/')	#
		except (OSError):				#
			mer = 0					#
		mer = 0
		xx = [0]*(numatom) # sets the array to zero and other initial conditions
		i = 0
		aa = 0
		no = 0
		var3 = 0
		sss = [0]*(len(eigval))
		while mer< len(eigval):
			slowest_mode = anm[mer]###
			r = slowest_mode.getEigvec()###
			p = slowest_mode.getEigval()###
			a = 0
			tt = 0
			ttt = 1
			tttt = 2
			while a < numatom:
				atom = atomname[a]
				mass = 0
				while mass < 2:
					if atom[mass] == "N": # all nitrogen
						m = 14.0067
						break
					elif atom[mass] == 'H': # all hydrogen
						m = 1.00794
						break
					elif atom[mass] == "C" : # all carbon
						m = 12.0107
						break
					elif atom[mass] == "O" : # all oxygen
						m = 15.9994
						break
					elif atom[mass] == 'S': # all sulfur
						m = 32.065
						break
					elif atom[mass] == 'P' : # all phosphorus
						m = 30.973762
						break
					else:
						if mass == 0:
							mass +=1
							try:
								atom[mass]
							except (IndexError):
								m = 1
								if no == 0:
									print 'Enter atom '+atom+' in to the system. Its mass was set to 1 in this simulation.'
									no +=1
								break
						else:
							m = 1
							if no == 0:
								print 'Enter atom '+atom+' in to the system. Its mass was set to 1 in this simulation'
								no +=1
							break
				if len(r)/numatom == 3:
					xx[i] = (r[tt]**2 + r[ttt]**2 + r[tttt]**2)/m
					i +=1
					tt +=3
					ttt+=3
					tttt+=3
				else:
					xx[i] = (r[tt]**2)/m
					i +=1
					tt +=1
				a +=1
			var3 = 0
			j = 0
			loop = 1
			while loop == 1:
				if sum(xx) == 0: # need this because you can't divide by 0
					loop = 0
				elif j <(numatom):
					var1 = xx[j]/sum(xx)
					if var1 == 0:
						var2 = 0
					elif var1 != 0:
						from math import log # this means natural log
						var2 = var1* log(var1)
					var3 += var2
					j +=1
				else:
					from math import exp
					k = exp(-var3)/numatom
					sss[aa] = k, aa+1
					aa +=1
					mer +=1
					loop = 0
					i = 0
					xx = [0]*(numatom)  # goes through all this until the big loop is done
		a = 0
		k=[0]*(len(eigval))
		while a < len(eigval):
			k[a]=prody.calcCollectivity(anm[a]),a+1
			a +=1


		collectivefile = bgn+folder+collectivityfolder+'/'+name+collectivitynewname+'.'+collectivityendname
		fout = open(collectivefile,'w')
		if massnomass=='0':
			fout.write('MODE      COLLECTIVITY(mass)')
			fout.write("""
---------------------------
""")
			for h in sorted(sss,reverse=True):
				fout.write(str(h)[-3:-1]+'        '+str(h)[1:19]+"""
""")
			fout.write("""

MODE      COLLECTIVITY(without mass)""")
			fout.write("""
---------------------------
""")
			for hh in sorted(k,reverse=True):
				fout.write(str(hh)[-3:-1]+'        '+str(hh)[1:19]+"""
""")
		elif massnomass=='1':
			fout.write('MODE      COLLECTIVITY(without mass)')
			fout.write("""
---------------------------
""")
			for hh in sorted(k,reverse=True):
				fout.write(str(hh)[-3:-1]+'        '+str(hh)[1:19]+"""
""")
			fout.write("""

MODE      COLLECTIVITY(mass)""")
			fout.write("""
---------------------------
""")
			for h in sorted(sss,reverse=True):
				fout.write(str(h)[-3:-1]+'        '+str(h)[1:19]+"""
""")
		fout.close()
		if showresults=='1':
			os.system('/usr/bin/gnome-open '+collectivefile)

		fin = open(collectivefile,'r')
		lst = fin.readlines()
		hi0 = 2
		looop = 1
		prut=0
		secoll=0
		thicoll=0
		while looop == 1:
			fine = lst[hi0]
			if int(fine[0:2]) >= brat:
				if prut==0:
					prut=fine[0:2]
				elif secoll==0:
					secoll=fine[0:2]
				elif thicoll==0:
					thicoll=fine[0:2]
				else:
					foucoll=fine[0:2]
					looop = 0
			else:
				hi0 +=1
		mostcollective= "Mode "+prut+" is the most collective."
		Tkinter.Label(onlypage,text='Mode '+prut+' is the most collective').grid(row=12,column=0,sticky='w')
		root.update()
		print mostcollective
		fin.close()

	if sample1 == 1:
		print 'Saving sample file'
		Tkinter.Label(onlypage,text='Saving sample file').grid(row=13,column=0,sticky='w')
		root.update()
		a = modeens+' '
		b = [0]*(len(a)+1)
		i = 0
		j = 0
		b1 = 0
		while i < len(a):
			if a[i:i+1] ==' ' or a[i:i+1]==',':
				try:
					b[b1]=int(a[j:i])-1
				except:
					if '1c' in a[j:i]:
						b[b1]=int(prut)-1
					elif '2c' in a[j:i]:
						b[b1]=int(prut)-1
						b1 +=1
						b[b1]=int(secoll)-1
					elif '3c' in a[j:i]:
						b[b1]=int(prut)-1
						b1 +=1
						b[b1]=int(secoll)-1
						b1 +=1
						b[b1]=int(thicoll)-1
					elif '4c' in a[j:i]:
						b[b1]=int(prut)-1
						b1 +=1
						b[b1]=int(secoll)-1
						b1 +=1
						b[b1]=int(thicoll)-1
						b1+=1
						b[b1]=int(foucoll)-1
				j = i+1
				i +=1
				b1 +=1
			else:
				i +=1
		del b[b1:]
		ensemble = prody.sampleModes(anm[b],pro, n_confs=int(confens), rmsd =float(rmsdens))
		p38ens=pro.copy()
		p38ens.delCoordset(0)
		p38ens.addCoordset(ensemble.getCoordsets())
		prody.writePDB(bgn+folder+name+samplenewname+'.pdb',p38ens)


	if traverse1 ==1:
		print 'Saving traverse file'
		Tkinter.Label(onlypage,text='Saving traverse file').grid(row=14,column=0,sticky='w')
		root.update()
		if modetra=='c':
			modefortra=int(prut)-1
		else:
			modefortra=int(modetra)-1
		trajectory=prody.traverseMode(anm[modefortra],pro,n_steps=int(steptra),rmsd=float(rmsdtra))
		prody.calcRMSD(trajectory).round(2)
		p38traj=pro.copy()
		p38traj.delCoordset(0)
		p38traj.addCoordset(trajectory.getCoordsets())
		prody.writePDB(bgn+folder+name+'_mode'+str(modefortra+1)+traversenewname+'.pdb',p38traj)
	if crosscorr==1:
		print 'Saving cross correlation'
		Tkinter.Label(onlypage,text='Saving cross-correlation').grid(row=15,column=0,sticky='w')
		root.update()
		try:						#
			os.makedirs(bgn+folder+corrfolder+'/')	#
		except (OSError):				#
			mer = 0
		i=int(compmode01)
		while i <= int(compmode02):
			x=i-1
			correlationdataname=bgn+folder+corrfolder+'/'+name+corrname+'_mode'+str(x+1)+'.'+corrend
			prody.writeArray(correlationdataname,prody.calcCrossCorr(anm[x]),'%.18e')
			print correlationdataname
			i+=1

##
	if sqflucts==1:
		print 'Saving square fluctuation'
		Tkinter.Label(onlypage,text='Saving square fluctuation').grid(row=16,column=0,sticky='w')
		root.update()
		try:						#
			os.makedirs(bgn+folder+sqfluctsfolder+'/')	#
		except (OSError):				#
			mer = 0
		i=int(compmode01)
		while i < int(compmode02):
			yelp = i-1
			sqfluctdataname = bgn+folder+sqfluctsfolder+'/'+name+sqfluctsname+'_mode'+str(yelp+1)+'.'+sqfluctsend
			fout = open(sqfluctdataname,'w')
			if separatevar1=='0':
				a = 0
				while a < numatom:
					fout.write(str(a))
					fout.write("""	""")
					fout.write(str(prody.calcSqFlucts(anm[yelp])[a]))
					fout.write("""
""")
					a +=1
			elif separatevar1=='1':
				a=0
				while a <numatom:
					firstresnum=int(p38.getResnums()[0:1][0])
					origiresnum=int(p38.getResnums()[0:1][0])
					while firstresnum<(int(numatom*1.0/p38.numChains())+origiresnum):
						fout.write(str(firstresnum))
						fout.write('\t')
						fout.write(str(prody.calcSqFlucts(anm[yelp])[a]))
						fout.write('\n')
						a+=1
						firstresnum+=1
					fout.write('&\n')
			fout.close()
			print sqfluctdataname
			i+=1
	if temfac==1:
		print 'Saving temperature factors'
		Tkinter.Label(onlypage,text='Saving temperature factors').grid(row=17,column=0,sticky='w')
		root.update()
		try:						#
			os.makedirs(bgn+folder+temfacfolder+'/')	#
		except (OSError):				#
			mer = 0

		fin=open(pdbfilename,'r')
		d = [None]*len(atomname)
		e = 0
		for line in fin:
			pair = line.split()
			if 'ATOM  ' in line and e < len(atomname):
				if str(pair[2]) == str(atomname[e]):
					d[e]=str(pair[1])
					e+=1
				else:
					e+=0
			else:
				continue
		fin.close()
		sqf = prody.calcSqFlucts(anm)
		x = sqf/((sqf**2).sum()**.5)
		y = prody.calcTempFactors(anm,pro)
		a = 0
		tempfactorsdataname =bgn+folder+temfacfolder+'/'+name+temfacname+'.'+temfacend
		fout=open(tempfactorsdataname,'w')
		fout.write("""Atom	Residue	      TempFactor   TempFactor with exp beta
""")
		while a < numatom:
			fout.write("""{0:4s}	{1:4d}	{2:15f}	{3:15f}
""".format(d[a],a+1,x[a],y[a]))
			a +=1
		fout.close()
		print tempfactorsdataname
	if fracovar==1:
		try:
			import matplotlib.pyplot as plt
			print 'Saving Fraction of Variance'
			Tkinter.Label(onlypage,text='Saving Fraction of Variance').grid(row=18,column=0,sticky='w')
			root.update()
			try:						#
				os.makedirs(bgn+folder+modesfolder+'/')	#
			except (OSError):				#
				mer = 0					#
			plt.figure(figsize = (5,4))
			prody.showFractVars(anm)
			prody.showCumulFractVars(anm)
			fracvardataname =bgn+folder+modesfolder+'/'+name+fraconame+'.'+fracoend
			plt.savefig(fracvardataname)
			print fracvardataname
			if showresults=='1':
				os.system('/usr/bin/gnome-open '+fracvardataname)
		except:
			print 'Error: Fraction of Variance'
			Tkinter.Label(onlypage,text='Error: Fraction of Variance').grid(row=18,column=0,sticky='w')
			root.update()
			mer=0

	if ovlap==1 or ovlaptab==1:
		try:
			import matplotlib.pyplot as plt
			print 'Saving Overlap'
			Tkinter.Label(onlypage,text='Saving Overlap').grid(row=19,column=0,sticky='w')
			root.update()


			Tkinter.Label(onlypage,text='Comparison: '+comppdbfilename).grid(row=20,column=0,sticky='w')


##
			find = 0
			while find < len(comppdbfilename):
				if comppdbfilename[-(find+1):-find] == '/':
					bgn1 = len(comppdbfilename)-find
					break
				else:
					find +=1
			try:
				float(bgn1)
			except (NameError):
				bgn1 = 0
			find = 0
			while bgn1+find<len(comppdbfilename):
				if comppdbfilename[bgn1+find:bgn1+find+1] == '.':
					end1 = len(comppdbfilename)-(bgn1+find)
					break
				else:
					find +=1
			try:
				name1 = comppdbfilename[bgn1:-end1]
			except (NameError):
				name1 = comppdbfilename[bgn1:len(comppdbfilename)]
			bgn1 = comppdbfilename[:bgn1]
			p381 = prody.parsePDB(comppdbfilename,model=int(modelnumber))
			if selatom == "C-alpha" and noma1 == "Gaussian Normal Mode":
				pro1 = p381.select('protein and name CA')
			elif selatom == "C-alpha" and noma1 == "Anisotropic Normal Mode":
				pro1 = p381.select('protein and name CA')
			elif selatom == "Heavy" and noma1 == "Gaussian Normal Mode":
				pro1 = p381.select('protein and not name "[1-9]?H.*"')
			elif selatom == "Heavy" and noma1 == "Anisotropic Normal Mode":
				pro1 = p381.select('protein and not name "[1-9]?H.*"')
			elif selatom == "All" and noma1 == "Gaussian Normal Mode":
				pro1 = p381.select('protein')
			elif selatom == "All" and noma1 == "Anisotropic Normal Mode":
				pro1 = p381.select('protein')
			elif selatom == "Backbone" and noma1 == "Gaussian Normal Mode":
				pro1 = p381.select('protein and name CA C O N H')
			elif selatom == "Backbone" and noma1 == "Anisotropic Normal Mode":
				pro1 = p381.select('protein and name CA C O N H')
			elif selatom == "Sidechain" and noma1 == "Gaussian Normal Mode":
				pro1 = p381.select('protein and not name CA C O N H')
			elif selatom == "Sidechain" and noma1 == "Anisotropic Normal Mode":
				pro1 = p381.select('protein and not name CA C O N H')
			if noma1 == "Gaussian Normal Mode":
				print 'Building the Kirchhoff matrix'
				Tkinter.Label(onlypage,text='Building Kirchhoff').grid(row=21,column=0,sticky='w')
				root.update()
				anm1 = prody.GNM(name1)
				if gamcut=='0':
					anm1.buildKirchhoff(pro1,cutoff=float(cut1),gamma=gammaDistanceDependent)
					anm1.setKirchhoff(anm1.getKirchhoff())
				elif gamcut=='1':
					anm1.buildKirchhoff(pro1,cutoff=float(cut2),gamma=float(gam2))
				brat = 2
			elif noma1 == "Anisotropic Normal Mode":
				print 'Building the Hessian matrix'
				Tkinter.Label(onlypage,text='Building Hessian').grid(row=21,column=0,sticky='w')
				root.update()
				anm1 = prody.ANM(name1)
				if gamcut=='0':
					anm1.buildHessian(pro1,cutoff=float(cut1),gamma=gammaDistanceDependent)
					anm1.setHessian(anm1.getHessian())
				elif gamcut=='1':
					anm1.buildHessian(pro1,cutoff=float(cut2),gamma=float(gam2))
				brat = 7
			print 'Calculating modes'
			Tkinter.Label(onlypage,text='Calculating modes').grid(row=22,column=0,sticky='w')
			root.update()
			anm1.calcModes(int(nummodes),zeros = True)
##
			try:
				os.makedirs(bgn+folder+ovlapfold+'/')
			except (OSError):
				mer = 0
			if ovlap==1:
				i=int(compmode01)
				while i < int(compmode02):
					a = i-1
					plt.figure(figsize=(5,4))
					prody.showCumulOverlap(anm[a],anm1)
					prody.showOverlap(anm[a],anm1)
					plt.title('Overlap with Mode '+str(a+1)+' from '+name)
					plt.xlabel(name1+' mode index')
					overlapname = bgn+folder+ovlapfold+'/'+name+'_'+name1+ovlapname+'_mode'+str(a+1)+'.'+ovlapend
					plt.savefig(overlapname)
					print overlapname
					i+=1
			if ovlaptab==1:
				plt.figure(figsize=(5,4))
				prody.showOverlapTable(anm1,anm)
				plt.xlim(int(compmode01)-1,int(compmode02))
				plt.ylim(int(compmode01)-1,int(compmode02))
				plt.title(name1+' vs '+name+' Overlap')
				plt.ylabel(name1)
				plt.xlabel(name)
				overlapname = bgn+folder+ovlapfold+'/'+name+'_'+name1+ovlaptabname+'.'+ovlaptabend
				plt.savefig(overlapname)
				print overlapname
		except:
			mer=0


	root.destroy()
	mynewtimeis = float(time.time()-start)
	if mynewtimeis <= 60.00:
		timeittook= "The calculations took %.2f s."%(mynewtimeis)
	elif mynewtimeis > 60.00 and mynewtimeis <= 3600.00:
		timeittook= "The calculations took %.2f min."%((mynewtimeis/60.00))
	else:
		timeittook= "The calculations took %.2f hrs."%((mynewtimeis/3600.00))
	print timeittook
	if smodel==1 and scollec==1:
		return (timeittook,modelfilename,str(int(prut)))
	elif scollec==1:
		return (timeittook,'nofile',str(int(prut)))
	elif smodel==1:
		return (timeittook,modelfilename,'nocoll')
	else:
		return (timeittook,'nofile','nocoll')
示例#8
0
 def calc_rmsd_to_frame(self, frame, align=False, sel='all'):
     ag = self.ag.copy()
     ag.setACSIndex(frame)
     if align:
         prody.alignCoordsets(ag.select(sel))
     return prody.calcRMSD(ag.select(sel))
示例#9
0
def calc(i, j):
    r = prody.calcRMSD(i, j)
    return (r, -1 * r**2)
示例#10
0
 def _do_align(self):
     self._transformation = prody.calcTransformation(
         self._prediction, self._native)
     self._transformation.apply(self._prediction)
     rmsd = prody.calcRMSD(self._native, self._prediction)
     self._align_results = RMSDAlignmentResult(rmsd)
示例#11
0
def calc(i, j):
    """calculate RMSD"""
    mob, trans = prody.superpose(j, i)
    return prody.calcRMSD(i, mob)
示例#12
0
def get_single_rmsd(reference, model):
    ref_backbone = reference.select('backbone or name OC2')
    mod_backbone = model.select('backbone or name OC2')

    prody.superpose(mod_backbone, ref_backbone)
    return prody.calcRMSD(mod_backbone, ref_backbone)
示例#13
0
def find_rep_gene_iso_models(hi_res_iso_models, lo_res_iso_models,
                             rep_rmsd_cutoff):
    '''
	Function to pick representative iso gene models from a pool of representative
	pdb iso models. Uses a greedy algorithm to cover as much of the gene sequence
	as possible using first high resolution models and then filling any gaps 
	with low resolution models

	'''

    # only make a model a representative model if is at least 15 residues
    # long and if it includes at least 10
    # residues that have never been seen in previous models or if it has
    # a significantly different conformation than previous models
    rep_gene_iso_models = []
    min_length = 20
    num_new_residue_cutoff = 10
    rep_overlap_cutoff = 10

    # tag each model with it's sequence coverage
    hi_res_iso_models = [[m, get_seq_range(m)] for m in hi_res_iso_models]
    lo_res_iso_models = [[m, get_seq_range(m)] for m in lo_res_iso_models]

    # sort lists of models by length of sequence coverage
    sorted_hi_res_iso_models = sorted(hi_res_iso_models,
                                      key=lambda m: -1 * len(m[1]))
    sorted_lo_res_iso_models = sorted(lo_res_iso_models,
                                      key=lambda m: -1 * len(m[1]))
    sorted_iso_models = sorted_hi_res_iso_models + sorted_lo_res_iso_models

    # use greedy algorithm to try to cover full gene sequence
    gene_coverage = []

    # start with large hi res models, end with small lo res models
    for model in sorted_iso_models:
        model_file = model[0]
        model_coverage = model[1]

        # discrard structures that have too few number of residues
        if len(model_coverage) >= min_length:
            intersection = list(set(model_coverage) & set(gene_coverage))
            num_new_residues = len(model_coverage) - len(intersection)

            # if rep model list is empty, make it a rep model
            if len(rep_gene_iso_models) == 0:
                rep_gene_iso_models.append(model_file)
                gene_coverage += model_coverage

            # otherwise, if this model has enough new residues, add it to the representatives list
            elif num_new_residues >= num_new_residue_cutoff:
                rep_gene_iso_models.append(model_file)
                gene_coverage += model_coverage
                gene_coverage = list(set(gene_coverage))

            # otherwise check if it has a unique conformation
            else:
                model_struct = prody.parsePDB(model_file)
                redundant = False
                for rep_gene_iso_model in rep_gene_iso_models:
                    rep_struct = prody.parsePDB(
                        rep_gene_iso_model)  # get structure
                    # calc RMSD between model and rep
                    alignment = prody.matchAlign(model_struct,
                                                 rep_struct,
                                                 overlap=rep_overlap_cutoff)
                    if alignment != None:
                        rmsd = prody.calcRMSD(alignment[1], alignment[2])
                        if rmsd <= rep_rmsd_cutoff:
                            redundant = True  # we already have a representative for this segment
                            break
                # if the model does not match any of our representative models,
                # then it is unique - add it to the representative models list
                if not redundant:
                    rep_gene_iso_models.append(model_file)
                    gene_coverage += model_coverage
                    gene_coverage = list(set(gene_coverage))

    return rep_gene_iso_models
示例#14
0
def score_interaction_and_dump(parsed, ifgresn, vdmresn, ifg_contact_atoms,
                               vdm_contact_atoms, method, targetresi, cutoff,
                               pdbix, pdbname):
    cutoff = float(cutoff)
    ifgtype, vdmtype, ifginfo, vdminfo = get_ifg_vdm(parsed, ifgresn, vdmresn,
                                                     ifg_contact_atoms,
                                                     vdm_contact_atoms, method)

    if ifgtype[1] != ['N', 'CA', 'C'] and ifgtype[1] != ['CA', 'C', 'O']:
        ifgresn = constants.AAname_rev[ifgtype[0]]
        vdmresn = constants.AAname_rev[vdmtype[0]]
        ifgatoms = ifgtype[1]
        vdmatoms = vdmtype[1]

        # filter for only vdmresn vdms of ifgresn with ifgatoms
        # and vdmatoms directly involved in interactions
        num_all_vdms, lookupdf = filter_contact(ifgresn, vdmresn, ifgatoms,
                                                vdmatoms)
        query = []
        for atom in ifgatoms:
            query.append(
                parsed.select('chain {} and resnum {} and name {}'.format(
                    ifginfo[0], ifginfo[1], atom)).getCoords()[0])
        for atom in vdmatoms:
            query.append(
                parsed.select('chain {} and resnum {} and name {}'.format(
                    vdminfo[0], vdminfo[1], atom)).getCoords()[0])

        query = np.array(query)
        lookupcoords = pkl.load(
            open(
                '/home/gpu/Sophia/combs/st_wd/Lookups/refinedvdms/coords_of_{}.pkl'
                .format(ifgtype[0]), 'rb'))
        #lookupcoords = lookupcoords[:50] # delete

        ifglists = flip(ifgatoms, ifgresn)
        vdmlists = flip(vdmatoms, vdmresn)
        rmsds = []
        num_atoms = len(query)
        coords_ls = [
            item for item in lookupcoords if item[0] in lookupdf.index
        ]
        lookupatoms_to_clus = []
        counter = 0  # to keep count of how many pdbs are being output
        for item in coords_ls:
            if len(item) == 3:
                compare_rmsds = []
                ifg_vdm_ind = []
                for ifg_ind, ifgls in enumerate(ifglists):
                    for vdm_ind, vdmls in enumerate(vdmlists):
                        lookupatoms = get_order_of_atoms(
                            item, ifgresn, vdmresn, ifgls, vdmls)
                        moved, transf = pr.superpose(lookupatoms, query)
                        temp_rmsd = pr.calcRMSD(moved, query)
                        compare_rmsds.append(temp_rmsd)
                        ifg_vdm_ind.append([moved, temp_rmsd])
                # item[0] is df index
                rmsds.append([item[0], min(compare_rmsds)])
                # get index of which one had min rmsd
                for which_ind, each in enumerate(ifg_vdm_ind):
                    if each[1] == min(compare_rmsds):
                        lookupatoms_to_clus.append(each[0])
                        ########################################################################
                        #                   output pdb if low rmsd
                        ########################################################################
                        if each[1] < cutoff and counter < 30 and which_ind == 0:
                            # this is to ensure rmsd is below cutoff when not flipped
                            # bc don't want to take care of that in prody to output pdb
                            row = lookupdf.loc[item[0]]
                            try:
                                db_dir = '/home/gpu/Sophia/STcombs/20171118/database/reduce/'
                                par = pr.parsePDB(db_dir + row['pdb'] +
                                                  'H.pdb')
                            except:
                                db_dir = '/home/gpu/Sophia/combs/st_wd/20180207_db_molprobity_biolassem/'
                                par = pr.parsePDB(db_dir + row['pdb'] +
                                                  'H.pdb')

                            ifgchid, ifgresnum = row['chid_ifg'], row[
                                'resnum_ifg']
                            vdmchid, vdmresnum = row['chid_vdm'], row[
                                'resnum_vdm']
                            printout = copy.deepcopy(par)
                            printout = printout.select(
                                '(chain {} and resnum {}) or (chain {} and resnum {})'
                                .format(ifgchid, ifgresnum, vdmchid,
                                        vdmresnum))
                            printout.select('chain {} and resnum {}'.format(
                                ifgchid, ifgresnum)).setChids('Y')
                            printout.select('chain {} and resnum {}'.format(
                                vdmchid, vdmresnum)).setChids('X')
                            printout.select('all').setResnums(10)
                            printout_interactamer = []
                            integrin_interactamer = []
                            try:  # skip the ones that have segment ids. will prob need to update this
                                # for the newly combed stuff
                                for atom in ifgatoms:
                                    integrin_interactamer.append(
                                        parsed.select(
                                            'chain {} and resnum {} and name {}'
                                            .format(ifginfo[0], ifginfo[1],
                                                    atom)))
                                    printout_interactamer.append(
                                        printout.select(
                                            'chain Y and resnum 10 and name {}'
                                            .format(atom)))
                                for atom in vdmatoms:
                                    integrin_interactamer.append(
                                        parsed.select(
                                            'chain {} and resnum {} and name {}'
                                            .format(vdminfo[0], vdminfo[1],
                                                    atom)))
                                    printout_interactamer.append(
                                        printout.select(
                                            'chain X and resnum 10 and name {}'
                                            .format(atom)))
                                integrin_interactamer_prody = []

                                integrin_interactamer = sum(
                                    integrin_interactamer[1:],
                                    integrin_interactamer[0])
                                printout_interactamer = sum(
                                    printout_interactamer[1:],
                                    printout_interactamer[0])
                                try:
                                    assert len(integrin_interactamer) == len(
                                        printout_interactamer)

                                    interact_res = printout.select(
                                        '(chain X and resnum 10) or (chain Y and resnum 10)'
                                    )
                                    interactamer_transf = pr.applyTransformation(
                                        transf, printout_interactamer)
                                    outdir = './output_data/pdbfiles/'

                                    threecode = constants.AAname[ifgresn]

                                    pr.writePDB(
                                        outdir +
                                        '{}_{}_{}_{}{}_{}{}_{}_{}'.format(
                                            pdbix, pdbname, targetresi,
                                            ifginfo[1], ifgresn, vdminfo[1],
                                            vdmresn, cutoff, row.name),
                                        interactamer_transf)
                                    counter += 1
                                except:
                                    pass
                            except:
                                traceback.print_exc()
                                pass

            else:
                rmsds.append([int(item[0]), 100000])

        # count how many NNs the query intrxn has
        num_nn, norm_metrics = get_NN(lookupatoms_to_clus, num_atoms, rmsds,
                                      query, cutoff, num_all_vdms)
        print('num NN')
        print(num_nn)

        exp_list = norm_metrics[-1]
        print('======= FOR NEAREST NEIGHBORS ==========')
        print('avg with single')
        print(exp_list[0])
        print('avg without single')
        print(exp_list[1])
        print('median with single')
        print(exp_list[2])
        print('median without single')
        print(exp_list[3])

        # do greedy clustering
        D = make_pairwise_rmsd_mat(
            np.array(lookupatoms_to_clus).astype('float32'))
        D = make_square(D)
        adj_mat = make_adj_mat(D, 0.5)
        mems, centroids = greedy(adj_mat)
        print('======= FOR GREEDY CLUS ==========')
        print('avg with singletons')
        print(np.mean([len(x) for x in mems]))
        print('avg without singletons')
        print(np.mean([len(x) for x in mems if len(x) > 1]))
        print('median with singletons')
        print(np.median([len(x) for x in mems]))
        print('median without singletons')
        print(np.median([len(x) for x in mems if len(x) > 1]))


        return ifginfo[0], ifginfo[1], ifgresn, vdminfo[0], vdminfo[1],\
            vdmresn, ifgatoms, vdmatoms, num_nn, norm_metrics
示例#15
0
def calcANMPathway(
    pdb_a,
    pdb_b,
    k=0.1,
    r_c=15,
    U0_a=0,
    U0_b=0,
    sa=0.8,
    sb=0.4,
    t_rmsd=0.1,
    tol=10 ** (-4),
    crit_rmsd=1,
    m=100,
    max_iter=100,
):
    import numpy as np
    import prody as pd
    import scipy as sci
    import scipy.spatial as sp

    def calc_dU(coords, coords_ref, cutoff=r_c, k=k):
        gnm = pd.GNM()
        gnm.buildKirchhoff(coords_ref, cutoff, k)
        kirchhoff = gnm.getKirchhoff()
        np.fill_diagonal(kirchhoff, 0)
        kirchhoff = abs(kirchhoff)

        n_atom = coords.shape[0]

        xi = coords[:, 0]
        yi = coords[:, 1]
        zi = coords[:, 2]
        xj = coords_ref[:, 0]
        yj = coords_ref[:, 1]
        zj = coords_ref[:, 2]

        xi, xj = np.meshgrid(xi, xj)
        yi, yj = np.meshgrid(yi, yj)
        zi, zj = np.meshgrid(zi, zj)
        mag = np.sqrt(np.square(xi - xj) + np.square(yi - yj) + np.square(zi - zj))
        np.fill_diagonal(mag, -1)

        D = sp.distance.squareform(sp.distance.pdist(coords, metric="euclidean"))
        D0 = sp.distance.squareform(sp.distance.pdist(coords_ref, metric="euclidean"))

        dU = np.multiply(kirchhoff, D - D0)
        dU = dU / np.max(abs(dU))
        dUx = np.multiply(dU, np.divide(xi - xj, mag))
        dUy = np.multiply(dU, np.divide(yi - yj, mag))
        dUz = np.multiply(dU, np.divide(zi - zj, mag))

        # dUx = np.nansum(sci.triu(dUx))
        # dUy = np.nansum(sci.triu(dUy))
        # dUz = np.nansum(sci.triu(dUz))
        dUx = np.sum(dUx, axis=1) / dU.shape[1]
        dUy = np.sum(dUy, axis=1) / dU.shape[1]
        dUz = np.sum(dUz, axis=1) / dU.shape[1]

        return dUx, dUy, dUz

    def findCuspStruct(pdb_a, pdb_b, ensemble_ref, m=m):
        ensemble = pd.Ensemble()
        ensemble.setAtoms(pdb_a)
        ensemble.setCoords(pdb_a.getCoords())
        conf_i = pdb_a.copy()
        conf_f = pdb_b.copy()
        conf_f, T = pd.superpose(conf_f, conf_i)
        v = conf_f.getCoords() - conf_i.getCoords()
        for i in np.linspace(0, 1, m):
            q = i
            p = 1 - q
            coords = (p * v) + conf_i.getCoords()
            ensemble.addCoordset(coords)
        E_trans = calcMultiStateEnergy(ensemble, ensemble_ref, cutoff=r_c, k=k)
        E_trans = E_trans / np.max(E_trans)
        diff_E = abs(E_trans[0, :] - E_trans[1, :])
        ind_trans = np.argmin(diff_E)
        coords = ensemble[ind_trans].getCoords()
        return (coords, diff_E[ind_trans])

    def minimize(coords, coords_ref, s, cutoff=r_c, k=k, U0=None):
        dUx, dUy, dUz = calc_dU(coords, coords_ref, cutoff=cutoff, k=k)
        dx = np.multiply(s, dUx)
        dy = np.multiply(s, dUy)
        dz = np.multiply(s, dUz)
        # print '\tMoving coordinates max <%f, %f, %f>'%(np.max(abs(dx)),np.max(abs(dy)),np.max(abs(dz)))
        x = coords[:, 0] - dx
        y = coords[:, 1] - dy
        z = coords[:, 2] - dz
        newcoords = np.zeros(coords.shape)
        newcoords[:, 0] = x
        newcoords[:, 1] = y
        newcoords[:, 2] = z
        return newcoords

        # Instantiate containers for data
        # pdb_b, junk = pd.superpose(pdb_b, pdb_a)

    pdb_container_a = pdb_a.copy()
    pdb_container_b = pdb_b.copy()
    pdb_trans = pdb_a.copy()
    path_a = pd.Ensemble("Path from transition to state A")
    path_b = pd.Ensemble("Path from transition to state B")
    path = pd.Ensemble("Transition Path")
    path_a.setAtoms(pdb_a)
    path_b.setAtoms(pdb_b)
    path.setAtoms(pdb_trans)
    # path_a.setCoords(pdb_a)
    # path_b.setCoords(pdb_b)

    ensemble_ref = pd.Ensemble()
    ensemble_ref.setAtoms(pdb_a)
    ensemble_ref.addCoordset(pdb_a)
    ensemble_ref.addCoordset(pdb_b)

    # Interpolate coordinates
    print "Searching for initial transition state."
    coords_trans_i, E_trans_i = findCuspStruct(pdb_container_a, pdb_container_b, ensemble_ref)

    # Search for transition state
    print "Minimizing transition state."
    coords_trans_f = coords_trans_i
    E_trans_f = E_trans_i
    counter = np.zeros(1)
    while (counter < max_iter) and (E_trans_f > tol):
        counter += 1
        coords_trans_a = minimize(coords_trans_f, pdb_a.getCoords(), s=sa)
        coords_trans_b = minimize(coords_trans_f, pdb_b.getCoords(), s=sb)
        pdb_container_a.setCoords(coords_trans_a)
        pdb_container_b.setCoords(coords_trans_b)
        coords_trans_f, E_trans_f = findCuspStruct(pdb_container_a, pdb_container_b, ensemble_ref)
        print "\tBeginning iteration %d, dE=%f" % (counter, E_trans_f)
    pdb_trans.setCoords(coords_trans_f)

    # Find path from transition state to reference state A, using steepest descent
    print "Finding paths of steepest descent from transition state."
    counter = np.zeros(1)
    rmsd = pd.calcRMSD(pdb_a.getCoords(), pdb_trans.getCoords())
    pdb_container_a.setCoords(pdb_trans)
    while (counter < max_iter) and (rmsd > crit_rmsd):
        counter += 1
        path_a.addCoordset(minimize(pdb_container_a.getCoords(), pdb_a.getCoords(), s=sa))
        pdb_container_a.setCoords(path_a[-1])
        rmsd = pd.calcRMSD(pdb_a.getCoords(), pdb_container_a.getCoords())
        print "RMSD (path A): %f" % (rmsd)

        # Find path from transition state to reference state B, using steepest descent
    counter = np.zeros(1)
    rmsd = pd.calcRMSD(pdb_b.getCoords(), pdb_trans.getCoords())
    pdb_container_b.setCoords(pdb_trans)
    while (counter < max_iter) and (rmsd > crit_rmsd):
        counter += 1
        path_b.addCoordset(minimize(pdb_container_b.getCoords(), pdb_b.getCoords(), s=sb))
        pdb_container_b.setCoords(path_b[-1])
        rmsd = pd.calcRMSD(pdb_b.getCoords(), pdb_container_b.getCoords())
        print "RMSD (path B): %f" % (rmsd)

        # Stitch together frames of path in proper order
    for i in reversed(xrange(0, len(path_a))):
        path.addCoordset(path_a[i].getCoords())
    path.addCoordset(pdb_trans.getCoords())
    for i in xrange(0, len(path_b)):
        path.addCoordset(path_b[i].getCoords())
    print "Transition path calculation complete!"

    return (path, pdb_trans)
示例#16
0
pca_FN = os.path.join('prody.pca.npz')
if os.path.exists(pca_FN):
  pca = prody.loadModel(pca_FN)
else:
  pca = prody.PCA()
  pca.buildCovariance(ensemble) # Build covariance matrix
  pca.calcModes() # Calculate modes
  prody.saveModel(pca, filename=pca_FN[:-8])

if not os.path.isdir('figures'):
  os.makedirs('figures')

import matplotlib.pyplot as plt

if not os.path.isfile('rmsd.png'):
  rmsd = prody.calcRMSD(ensemble)
  plt.clf()
  plt.plot(rmsd);
  plt.xlabel('Conformation index');
  plt.ylabel('RMSD (A)');
  plt.title('RMSD %f (%f)'%(rmsd.mean(), rmsd.std()))
  plt.savefig('figures/rmsd.png')

if not os.path.isfile('blastPCA.png'):
  pc_ind0 = 0
  pc_ind1 = 1
  xtal_projection = prody.calcProjection(ensemble, pca[:20], rmsd=False)

  plt.clf()
  plt.plot(xtal_projection[:,pc_ind0],xtal_projection[:,pc_ind1],'ks')
#  titles = ['%s%s'%(pdb_id,chain_id) for (pdb_id,chain_id) in chain_hits]
示例#17
0
def sidechains_rmsd_calculator(pdb_target,
                               pdb_reference,
                               res_file=False,
                               area=False,
                               write2report=False,
                               ligand_chain="L"):
    """
    :param pdb_target: problem pdb file
    :param pdb_reference: reference pdb file
    :param radii: area that we want to select around the ligand
    :param path: output path
    :param write2report: if true extract a report file
    :param ligand_chain: name of the chain of the ligand
    :return: superpose the backbone of the pdb_target to the pdb_reference and computes the RMSD for each side
    chain in the selection area
    """
    target, reference = superimpose_backbones(pdb_target, pdb_reference)
    if area:
        print("Selection set of {} Amstrongs".format(area))
        selected_area_target = reference.select(
            "protein and (within {} of chain {})".format(area, ligand_chain))
        unique_residues_target = sorted(set(selected_area_target.getResnums()))
    elif res_file:
        aminoacids_list = read_selecteds_from_file(res_file)
        print(
            "Searching the following amino acids: {}".format(aminoacids_list))
        selected_area_target = reference.select("resnum {}".format(
            ' '.join(aminoacids_list)))
        unique_residues_target = sorted(set(selected_area_target.getResnums()))
    else:
        print(
            "Please, set an input file or a radii to determine which amino acids will be used to compute the RMSD."
        )
    list_of_results = []
    for residue_target in unique_residues_target:
        res_selected_target = target.select(
            "protein and resnum {} and heavy".format(residue_target))
        res_selected_reference = reference.select(
            "protein and resnum {} and heavy".format(residue_target))
        target_CA = target.select(
            "protein and resnum {} and name CA".format(residue_target))
        reference_CA = reference.select(
            "protein and resnum {} and name CA".format(residue_target))
        try:
            RMSD = prody.calcRMSD(res_selected_reference, res_selected_target)
            distance_bet_CA = prody.calcRMSD(reference_CA, target_CA)
        except:
            print(
                "ERROR because different number of atoms in residue {}".format(
                    residue_target))
            print("ATOMS of the TARGET: {}".format(
                res_selected_target.getNames()))
            print("ATOMS of the REFERENCE: {}".format(
                res_selected_reference.getNames()))
        residue_information = (residue_target,
                               res_selected_target.getResnames()[0], RMSD,
                               distance_bet_CA)
        list_of_results.append(residue_information)
        print(residue_information)

    if write2report:
        filename = write2report
        with open(filename, "w") as report:
            for result in list_of_results:
                report.write("{:4d}\t{}\t{:5.3f}\t{:5.3f}\t{:5.3f}\n".format(
                    result[0], result[1], float(result[2]), float(result[3]),
                    (float(result[2]) - float(result[3]))))
示例#18
0
def calc(i, j):
    """calculate RMSD"""
    return prody.calcRMSD(i, j)
示例#19
0
def get_rmsds_to_reference(ensemble):
    """
    Gets RMSD of each structure to the reference
    """
    return pd.calcRMSD(ensemble)
示例#20
0
    def alignment_monstrosity(self,
                              rmsd_cutoff=0.5,
                              use_local_pdb_database=False,
                              verify_substructure=True):
        """
        Consequences of not thinking ahead...
        For each fragment, align all fragment-containing ligands to fragment
        Generate PDBs with aligned coordinate systems
        :param args:
        :param rmsd_cutoff: fragment alignment RMSD cutoff, anything higher gets rejected
        :return:
        """

        # Create directory for processed PDBs
        rejected_dict = self.load_previously_rejected_pdbs()

        # Create directories...
        if not use_local_pdb_database:
            os.makedirs(self.pdb_bank_dir, exist_ok=True)
        os.makedirs(self.processed_PDBs_path, exist_ok=True)

        # If use_local_pdb_database=False, use PDB FTP to download all structures
        # Otherwise, all relevant structures should be found in the local PDB database
        if not use_local_pdb_database:
            prody.pathPDBFolder(folder=self.pdb_bank_dir)

            for current_fragment in self.pdb_ligand_json:

                # Only download PDBs that aren't already in PDB bank directory
                existing_PDBs = [
                    pdb[:4].lower() for pdb in os.listdir(self.pdb_bank_dir)
                ]
                PDBs_to_download = list(
                    set(self.pdb_ligand_json[current_fragment]['PDBs']) -
                    set(existing_PDBs))

                if len(PDBs_to_download) > 0:
                    print(f'Downloading PDBs for {current_fragment}...\n')
                    prody.fetchPDBviaFTP(*PDBs_to_download)
                else:
                    print(
                        f'All relevant PDBs for {current_fragment} found in {self.pdb_bank_dir}!\n'
                    )

        # Fragment_1, Fragment_2, ...
        for current_fragment in self.pdb_ligand_json:

            # Create directory for processed PDBs
            processed_dir = os.path.join(self.processed_PDBs_path,
                                         current_fragment)
            processed_dir_exists = os.path.exists(processed_dir)
            os.makedirs(processed_dir, exist_ok=True)

            # Get list of already processed PDBs for current_fragment
            already_processed_pdbs = [
                file[:4].lower() for file in os.listdir(processed_dir)
            ]

            # Save ideal_ligand_containers for each fragment so things are only downloaded once
            ideal_ligand_dict = dict()
            ideal_ligand_dict['Ligands'] = dict()
            ideal_ligand_dict['Failed'] = list()

            # Align_PDB class holds all information for the current fragment
            align = Align_PDB(self.user_defined_dir,
                              current_fragment,
                              self.sanitized_smiles_dict[current_fragment],
                              verify_substructure=verify_substructure)

            # Get PDB IDs that are viable for extracting protein-fragment contacts
            reject_pdbs = rejected_dict[
                current_fragment] if current_fragment in rejected_dict.keys(
                ) else list()
            if not processed_dir_exists:
                reject_pdbs = list()
            reject_pdbs.append('3k87')  # DEBUGGING

            viable_pdbs = list(
                set(self.pdb_ligand_json[current_fragment]['PDBs']) -
                set(reject_pdbs) - set(already_processed_pdbs))

            # For each PDB containing a fragment-containing compound
            for pdbid in viable_pdbs:

                # Return path of PDB file to use for processing
                found_pdb, pdb_path = self.return_PDB_to_use_for_alignments(
                    pdbid, use_local_pdb_database=use_local_pdb_database)

                if not found_pdb:
                    print(f'Cannot find {pdbid}!')
                    continue

                # Proceed with processing if the current PDB passes all filters
                print("\n\nProcessing {}...".format(pdbid))

                # --- Check which ligands contain relevant fragments --- #

                relevant_ligands = self.return_substructure_containing_ligands(
                    pdb_path, self.pdb_ligand_json, current_fragment)

                # Set things up! Get ligands from Ligand Expo if haven't already tried and failed
                for ligand in relevant_ligands:

                    if not ideal_ligand_dict['Ligands'].get(
                            ligand
                    ) and ligand not in ideal_ligand_dict['Failed']:
                        ideal_ligand_container = Ideal_Ligand_PDB_Container(
                            ligand)

                        if ideal_ligand_container.success:
                            ideal_ligand_dict['Ligands'][
                                ligand] = ideal_ligand_container
                        else:
                            ideal_ligand_dict['Failed'].append(ligand)

                # Create a temp list for ligands that will be pulled from the current PDB
                ligand_container_dict_for_current_pdb = {
                    lig: ideal_ligand_dict['Ligands'][lig]
                    for lig in ideal_ligand_dict['Ligands']
                    if lig in relevant_ligands
                }
                relevant_ligands_prody_dict = align.extract_ligand_records(
                    pdb_path, ligand_container_dict_for_current_pdb)

                # Reject if no ligands with all atoms represented can be found for the given PDB
                if len(relevant_ligands_prody_dict) < 1:
                    if current_fragment in rejected_dict.keys():
                        rejected_dict[current_fragment].append(pdbid)
                    else:
                        rejected_dict[current_fragment] = [pdbid]
                    print(
                        'REJECTED - no target ligands were fully represented in the PDB'
                    )
                    continue

                # --- Perform alignment of PDB fragment substructure (mobile) onto defined fragment (target) --- #

                # ...if PDB has not been processed, rejected, or excluded by the user

                else:

                    # Iterate over ligands found to contain fragments as substructures
                    for ligand_resname, ligand_chain, ligand_resnum in relevant_ligands_prody_dict:

                        # Mapping of fragment atoms to target ligand atoms
                        target_ligand_ideal_smiles = ligand_container_dict_for_current_pdb[
                            ligand_resname].smiles

                        # todo: catch ligands with missing SMILES strings earlier...
                        if target_ligand_ideal_smiles is None:
                            continue

                        target_ligand_pdb_string = io.StringIO()
                        target_ligand_prody = relevant_ligands_prody_dict[(
                            ligand_resname, ligand_chain,
                            ligand_resnum)].select('not hydrogen')
                        prody.writePDBStream(target_ligand_pdb_string,
                                             target_ligand_prody)

                        mapping_successful, fragment_target_map = align.fragment_target_mapping(
                            target_ligand_ideal_smiles,
                            target_ligand_pdb_string)

                        if not mapping_successful:
                            if current_fragment in rejected_dict.keys():
                                rejected_dict[current_fragment].append(pdbid)
                            else:
                                rejected_dict[current_fragment] = [pdbid]
                            print(
                                'REJECTED - failed atom mapping between target and reference fragment'
                            )
                            continue

                        print(
                            f'\n{len(fragment_target_map)} possible mapping(s) of fragment onto {pdbid}:{ligand} found...\n'
                        )

                        # Iterate over possible mappings of fragment onto current ligand
                        rmsd_success = False
                        for count, mapping in enumerate(fragment_target_map):

                            # todo: refactor to use RDKit's atom.GetMonomerInfo() for atom selections...
                            # Determine translation vector and rotation matrix
                            target_coords_and_serials, frag_atom_coords, transformation_matrix = align.determine_rotation_and_translation(
                                mapping, target_ligand_prody)
                            trgt_atom_coords, target_fragment_atom_serials = target_coords_and_serials

                            # Apply transformation to protein_ligand complex if rmsd if below cutoff
                            # Use information from PubChem fragment SMILES in determining correct mappings
                            # Actually, map fragment onto source ligand and use valence information to determine correct mappings
                            rmsd = prody.calcRMSD(
                                frag_atom_coords,
                                prody.applyTransformation(
                                    transformation_matrix, trgt_atom_coords))
                            print(
                                'RMSD of target onto reference fragment:\t{}'.
                                format(rmsd))

                            if rmsd < rmsd_cutoff:
                                transformed_pdb = align.apply_transformation(
                                    pdb_path, ligand_resnum,
                                    target_fragment_atom_serials,
                                    transformation_matrix)

                                # Continue if transformed_pdb - ligand is None
                                if transformed_pdb.select(
                                        f'not (resname {ligand_resname})'
                                ) is None:
                                    continue

                                transformed_pdb_name = f'{pdbid}_{ligand_resname}_{ligand_chain}_{ligand_resnum}-{count}.pdb'
                                prody.writePDB(
                                    os.path.join(processed_dir,
                                                 transformed_pdb_name),
                                    transformed_pdb)
                                rmsd_success = True

                            else:
                                print(
                                    'REJECTED - high RMSD upon alignment to reference fragment'
                                )

                        if rmsd_success is False:
                            if current_fragment in rejected_dict.keys():
                                rejected_dict[current_fragment].append(pdbid)
                            else:
                                rejected_dict[current_fragment] = [pdbid]

        # Remember rejected PDBs
        with open(self.rejected_dict_pickle, 'wb') as reject_pickle:
            pickle.dump(rejected_dict, reject_pickle)
示例#21
0
import sys
import argparse
import prody
from TMalign import TMalign


if __name__ == '__main__':
    p = argparse.ArgumentParser(description="L-RMS calculater")
    p.add_argument('reference_PDBfile')
    p.add_argument('model_PDBfile')
    p.add_argument('-r','--ref_receptor',default='A',help='chain name of reference receptor')
    p.add_argument('-l','--ref_ligand',default='B',help='chain name of reference ligand')
    p.add_argument('-R','--model_receptor',default='A',help='chain name of model receptor')
    p.add_argument('-L','--model_ligand',default='B',help='chain name of model ligand')
    p.add_argument('--tmalign',default='/usr/local/bin/TMalign',help='path to TMalign')
    args = p.parse_args()

    ref_receptor = prody.parsePDB(args.reference_PDBfile,chain=args.ref_receptor)
    ref_ligand = prody.parsePDB(args.reference_PDBfile,chain=args.ref_ligand)
    model_receptor = prody.parsePDB(args.model_PDBfile,chain=args.model_receptor)
    model_ligand = prody.parsePDB(args.model_PDBfile,chain=args.model_ligand)

    tmalign = TMalign(model_receptor,ref_receptor,path = args.tmalign)
    trans = prody.Transformation(tmalign.matrix,tmalign.vector)
    trans.apply(model_ligand)
    lrms = prody.calcRMSD(model_ligand,ref_ligand)

    print lrms


示例#22
0
 def _do_align(self):
     self._transformation = prody.calcTransformation(self._prediction, self._native)
     self._transformation.apply(self._prediction)
     rmsd = prody.calcRMSD(self._native, self._prediction)
     self._align_results = RMSDAlignmentResult(rmsd)
def prune_pdb_models(pdb_models):
    '''
	This function takes a list of structural models corresponding to a single
	pdb ID (just isolated models). It prunes them to find representative
	models and eliminates redundant ones

	Arguments: 
	pdb_models -- full list of pdb models (iso)

	Returns:
	pruned_models -- list of pruned representative pdb models
	'''
    pruned_models = []

    # determine which files actually exist, delete parent dirs of those that don't
    iso_pdb_models = []

    for model in pdb_models:
        if not os.path.exists(model):
            print os.path.basename(
                model), 'does not exist! Deleting parent directory.'
            delete_model(model)
        else:
            iso_pdb_models.append(model)

    # find representative models
    rep_overlap_cutoff = 50  # percent seq overlap required (90% seq ID required)
    rep_rmsd_cutoff = 5  # models less than 4A apart are represented by a single model

    # find representative iso models
    print 'Finding representative PDB ISO models...'
    rep_iso_models = []

    for iso_model in iso_pdb_models:
        if len(rep_iso_models) == 0:
            rep_iso_models.append(iso_model)
        else:
            model = prody.parsePDB(iso_model)  # get structure
            redundant = False

            for rep_iso_model in rep_iso_models:
                rep = prody.parsePDB(rep_iso_model)  # get structure

                # calc RMSD between model and rep
                alignment = prody.matchAlign(model,
                                             rep,
                                             overlap=rep_overlap_cutoff)
                if alignment != None:
                    rmsd = prody.calcRMSD(alignment[1], alignment[2])
                    if rmsd <= rep_rmsd_cutoff:
                        redundant = True  # we already have a representative for this segment
                        # take the larger structure as the representative
                        if model.numResidues() > rep.numResidues():
                            rep_iso_models.remove(rep_iso_model)
                            rep_iso_models.append(iso_model)
                        break

            # if the iso model does not match any of our representative models,
            # then add it to the representative models list
            if not redundant:
                rep_iso_models.append(iso_model)

    print 'Found', len(rep_iso_models), 'representative ISO models:', map(
        os.path.basename, rep_iso_models)

    # move representative models to their own directory
    if len(rep_iso_models) > 0:
        pdb_dir = os.path.abspath(
            os.path.join(rep_iso_models[0], os.pardir + '/' + os.pardir))
        rep_model_dir = pdb_dir + '/representative_pdb_models/'
        if os.path.exists(rep_model_dir):
            shutil.rmtree(rep_model_dir)
        os.mkdir(rep_model_dir)

        for rep_iso_model in rep_iso_models:
            rep_iso_model_pardir = os.path.abspath(
                os.path.join(rep_iso_model, os.pardir))
            new_path = rep_model_dir + '/' + os.path.basename(
                rep_iso_model_pardir)
            shutil.copytree(rep_iso_model_pardir, new_path)
            # define new pathname to keep track of the models once we move them
            new_iso_model_path = rep_model_dir + os.path.basename(
                rep_iso_model_pardir) + '/' + os.path.basename(rep_iso_model)
            pruned_models.append(new_iso_model_path)

    # return all representative pdb models
    return pruned_models
示例#24
0
def get_single_rmsd(reference, model):
    ref_backbone = reference.select('backbone or name OC2')
    mod_backbone = model.select('backbone or name OC2')

    prody.superpose(mod_backbone, ref_backbone)
    return prody.calcRMSD(mod_backbone, ref_backbone)
示例#25
0
def generate_fuzzball_contact_rotamersets(ligand_conformer_path,
                                          match_path,
                                          match_pose,
                                          sfxn,
                                          match_residue_map,
                                          flag_special_rot=True,
                                          custom_taskop=None,
                                          rotset_limit=200,
                                          contact_method='RMSD',
                                          RMSD_limit=1.5,
                                          apply_minimization=False,
                                          dump_rotamerset_pdb=False,
                                          report_stats=False,
                                          defined_positions=None):
    """
    Generate rotamers that recapitulate observed fuzzball contacts for each position in a nucleated match

    :param ligand_conformer_path: path to ligand generated by molfile_to_params.py
    :param flag_special_rot: If true, flag rotamers as SPECIAL_ROT variants
    :param custom_taskop: list of task operations to apply to the PackerTask used to generate rotamers

    :return: viable_rotamers dictionary of rotamers organized by position and residue identity
    """

    sfxn_weights = sfxn.weights()
    conformer_resnum = match_pose.size(
    )  # Assumes single ligand appended to end of sequence

    if contact_method not in ['RMSD', 'matcher']:
        raise Exception(
            'Contact method needs to be one of the following: "RMSD", "matcher"'
        )

    # --- Find and store viable rotamers --- #

    viable_rotamers = dict()
    rotamer_stats = dict()

    # Setting things up is going to mess up the match pose, so use a clone
    match_pose_clone = match_pose.clone()
    sfxn(match_pose_clone)

    # --- Transform match pose clone onto fuzzball conformer --- #
    """Required for contact coordsets to make sense"""

    # Get ligand from match, always last residue
    # todo: select chain X, ligand is always chain X
    match_pose_size = match_pose_clone.size()
    match_ligand = match_pose_clone.residue(match_pose_size)

    # Get match positions if they exist
    motif_resnums = list()
    with open(match_path, 'r') as my_match:
        for line in my_match:
            if line.startswith('REMARK 666 MATCH TEMPLATE'):
                motif_resnums.append(int(line.split()[11]))

    motif_and_ligand_resnums = motif_resnums + [conformer_resnum]

    # Keep track of match positions and compatible residue identites
    # match_residue_map = {position: dict() for position in range(1, match_pose.size())}  # Assumes one ligand appended to end of sequence

    # Import conformer from pose
    fuzzball_ligand_pose = rosetta.core.pose.Pose()
    rosetta.core.import_pose.pose_from_file(fuzzball_ligand_pose,
                                            ligand_conformer_path)
    fuzzball_ligand = fuzzball_ligand_pose.residue(1)

    # Calculate rotation/translation by hand using first three atoms of ligand
    mobile_match = rosetta.numeric.xyzTransform_double_t(
        match_ligand.xyz(1), match_ligand.xyz(2), match_ligand.xyz(3))
    mobile_match_inverse = mobile_match.inverse()
    target_fuzzball = rosetta.numeric.xyzTransform_double_t(
        fuzzball_ligand.xyz(1), fuzzball_ligand.xyz(2), fuzzball_ligand.xyz(3))

    ligand_rotation = target_fuzzball.R * mobile_match_inverse.R
    ligand_translation = target_fuzzball.R * mobile_match_inverse.t + target_fuzzball.t

    # Apply transformation
    match_pose_clone.apply_transform_Rx_plus_v(ligand_rotation,
                                               ligand_translation)
    match_pose_clone_ligand = match_pose_clone.residue(match_pose_size).clone()

    # --- All other operations --- #

    # Mutate all non-motif residues within 10A from ligand to ALA, interferes with RotamerSet generation
    ligand_residue_selector = rosetta.core.select.residue_selector.ChainSelector(
        'X')
    neighborhood_selector = rosetta.core.select.residue_selector.NeighborhoodResidueSelector(
        ligand_residue_selector, 10, False)
    neighborhood_selector_bool = neighborhood_selector.apply(match_pose_clone)
    neighborhood_residues_resnums = rosetta.core.select.get_residues_from_subset(
        neighborhood_selector_bool)
    positions_to_consider = list(
        set(neighborhood_residues_resnums) - set(motif_and_ligand_resnums))

    mutate = rosetta.protocols.simple_moves.MutateResidue()
    mutate.set_res_name('ALA')

    for position in positions_to_consider:
        if match_pose_clone.residue(position).name3() not in [
                'GLY', 'PRO'
        ] and 'disulfide' not in match_pose_clone.residue(position).name():
            mutate.set_target(position)
            mutate.apply(match_pose_clone)

    # Build RotamerSets for each extrachi/sample level
    if dump_rotamerset_pdb:
        all_rotamersets = rosetta.core.pack.rotamer_set.RotamerSetsFactory.create_rotamer_sets(
            match_pose_clone)
        task_factory = rosetta.core.pack.task.TaskFactory()

        # NATRO positions TaskOp
        rotamer_candidates_rs = rosetta.core.select.residue_selector.ResidueIndexSelector(
            ','.join([str(i) for i in match_residue_map.keys()]))
        natro_rs = rosetta.core.select.residue_selector.NotResidueSelector(
            rotamer_candidates_rs)
        natro_op = rosetta.core.pack.task.operation.OperateOnResidueSubset(
            rosetta.core.pack.task.operation.PreventRepackingRLT(), natro_rs)
        task_factory.push_back(natro_op)

        rotamersets_packer_task = task_factory.create_task_and_apply_taskoperations(
            match_pose_clone)

        all_rotamersets.set_task(rotamersets_packer_task)

    # Remove ligand from match_pose_clone before generating rotamers!!!
    match_pose_clone_apo = match_pose_clone.clone()
    match_pose_clone_apo.conformation_ptr().delete_residue_slow(
        match_pose_size)

    # Define positions where rotamers will be considered

    if defined_positions:
        rotamerset_positions = list(
            set(defined_positions) & set(match_residue_map.keys()))
    else:
        rotamerset_positions = list(match_residue_map.keys())

    print(f'Rotamerset Positions: {rotamerset_positions}')

    # Generate rotamers at each position
    for position in rotamerset_positions:

        # Prepare minimization
        if apply_minimization:
            motif_movemap = rosetta.core.kinematics.MoveMap()
            motif_movemap.set_chi(position, True)

            minimize_motif = rosetta.protocols.minimization_packing.MinMover()
            minimize_motif.movemap(motif_movemap)
            minimize_motif.score_function(sfxn)
            minimize_motif.min_type('lbfgs_armijo')
            minimize_motif.tolerance(1e-6)

        # Prepare infrastructure
        rotamer_stats[position] = dict()

        if dump_rotamerset_pdb:
            current_rotamerset = rosetta.core.pack.rotamer_set.RotamerSetFactory.create_rotamer_set(
                match_pose_clone)

        # Keep rotamers that are compatible with minimal binding motif
        for contact_residue in match_residue_map[position]:

            # print(f'Considering position {position}: {contact_residue}')
            position_rotamer_list = list()
            possible_contact_geometries = match_residue_map[position][
                contact_residue]

            # --- Prepare viable rotamers for each position --- #

            # Define packertask using neighborhood_selector
            packer_task = rosetta.core.pack.task.TaskFactory.create_packer_task(
                match_pose_clone_apo)
            packer_task.initialize_from_command_line()

            # Get boolean vector for packable positions and apply to packer task
            packable_positions = rosetta.utility.vector1_bool()
            packable_position_list = [
                True if i == position else False
                for i in range(1, match_pose_clone_apo.size())
            ]
            for bool_value in packable_position_list:
                packable_positions.append(bool_value)
            packer_task.restrict_to_residues(packable_positions)

            # Only build rotamers for residues with Hbond donors/acceptors
            restrict_CAAs = rosetta.core.pack.task.operation.RestrictAbsentCanonicalAAS(
                position, rosetta.utility.vector1_bool(20))
            restrict_CAAs.keep_aas(contact_residue)
            restrict_CAAs.apply(match_pose_clone_apo, packer_task)

            packer_neighbor_graph = rosetta.core.pack.create_packer_graph(
                match_pose_clone_apo, sfxn, packer_task)

            match_rotamer_set = rosetta.core.pack.rotamer_set.RotamerSetFactory.create_rotamer_set(
                match_pose_clone_apo)
            match_rotamer_set.set_resid(position)
            match_rotamer_set.build_rotamers(match_pose_clone_apo,
                                             sfxn,
                                             packer_task,
                                             packer_neighbor_graph,
                                             use_neighbor_context=False)

            if match_rotamer_set.num_rotamers(
            ) <= 1 and match_rotamer_set.rotamer(1).name1() != contact_residue:
                continue

            print(
                f'Position {position} ResidueType {contact_residue} - comparing {match_rotamer_set.num_rotamers()} rotamers against {len(possible_contact_geometries)} contact modes'
            )

            rotamer_stats[position][contact_residue] = dict()
            rotamer_stats[position][contact_residue][
                'num_rotamers'] = match_rotamer_set.num_rotamers()
            rotamer_info = list()
            rotamers_accepted = 0

            # --- Evaluate Rotamers --- #

            for rotamer in range(1, match_rotamer_set.num_rotamers() + 1):

                # Place residue before applying to pose!!!!
                # Rotamers need to be transformed back onto the backbone of the input pdb!!!
                trail_rotamer = match_rotamer_set.rotamer(rotamer)
                trail_rotamer.place(match_pose_clone.residue(position),
                                    match_pose_clone.conformation_ptr())
                match_pose_clone.replace_residue(position, trail_rotamer,
                                                 False)
                pose_trial_rotamer = match_pose_clone.residue(position)

                # Evaluate RMSD to possible_contact_geometries
                contact_RMSDs = list()
                dof_errors = list()
                sad_atom_in_rotamer = False

                for contact_mode in possible_contact_geometries:

                    # REFERENCE: contact_info = [current_motif_coord_list, [float(a) for a in dof_tuple], constraint_atoms_dict['residue']['atom_names'], constraint_atoms_dict['ligand']['atom_names']]
                    current_motif_coord_list = contact_mode[0]
                    contact_dofs = contact_mode[1]
                    residue_matchatoms = contact_mode[2]
                    ligand_matchatoms = contact_mode[3]

                    # Skip rotamer if contact is mediated by a backbone atom...
                    if residue_matchatoms[0] in ['C', 'CA', 'N', 'O']:
                        continue

                    # Get contact atom coords using atom names
                    try:
                        rotamer_contact_coords = [
                            list(match_pose_clone.residue(position).xyz(atom))
                            for atom in residue_matchatoms
                        ]

                        # If distance is off, don't even bother...
                        residue_contactatom = pose_trial_rotamer.xyz(
                            residue_matchatoms[0])
                        ligand_contactatom = match_pose_clone_ligand.xyz(
                            ligand_matchatoms[0])
                        atom_displacement = ligand_contactatom - residue_contactatom
                        if atom_displacement.norm() > 4:
                            # print(f'Contact is {atom_displacement.norm()}A, continuing...')
                            continue

                        residue_atomid_list = [
                            pose_trial_rotamer.xyz(atom)
                            for atom in residue_matchatoms
                        ]
                        ligand_atomid_list = [
                            match_pose_clone_ligand.xyz(atom)
                            for atom in ligand_matchatoms
                        ]

                        # Res1 - ligand, Res2 - residue

                        # 'angle_A' is the angle Res1:Atom2 - Res1:Atom1 - Res2:Atom1
                        angle_A = rosetta.numeric.angle_degrees_double(
                            ligand_atomid_list[1], ligand_atomid_list[0],
                            residue_atomid_list[0])
                        # 'angle_B' is the angle Res1:Atom1 - Res2:Atom1 - Res2:Atom2
                        angle_B = rosetta.numeric.angle_degrees_double(
                            ligand_atomid_list[0], residue_atomid_list[0],
                            residue_atomid_list[1])
                        # 'torsion_A' is the dihedral Res1:Atom3 - Res1:Atom2 - Res1:Atom1 - Res2:Atom1
                        torsion_A = rosetta.numeric.dihedral_degrees_double(
                            ligand_atomid_list[2], ligand_atomid_list[1],
                            ligand_atomid_list[0], residue_atomid_list[0])
                        # 'torsion_AB' is the dihedral Res1:Atom2 - Res1:Atom1 - Res2:Atom1 - Res2:Atom2
                        torsion_AB = rosetta.numeric.dihedral_degrees_double(
                            ligand_atomid_list[1], ligand_atomid_list[0],
                            residue_atomid_list[0], residue_atomid_list[1])
                        # 'torsion_B' is the dihedral Res1:Atom1 - Res2:Atom1 - Res2:Atom2 - Res2:Atom3
                        torsion_B = rosetta.numeric.dihedral_degrees_double(
                            ligand_atomid_list[0], residue_atomid_list[0],
                            residue_atomid_list[1], residue_atomid_list[2])

                        rotamer_dofs = [
                            angle_A, angle_B, torsion_A, torsion_AB, torsion_B
                        ]

                    except Exception as e:
                        print(e, residue_matchatoms, ligand_matchatoms)
                        # print(f'Skipping {contact_mode[0]}: contains sad atom.')
                        sad_atom_in_rotamer = True
                        break

                    # todo: Edge condition at 0/360...
                    dof_difference_list = [
                        abs(ideal - measured) for ideal, measured in zip(
                            contact_dofs[1:], rotamer_dofs)
                    ]
                    # print('contact_dofs:', contact_dofs)
                    # print('rotamer_dofs:', rotamer_dofs)
                    # print('DOF DIFFERENCE LIST:', dof_difference_list)
                    dof_errors.append(max(dof_difference_list))

                    contact_RMSDs.append(
                        prody.calcRMSD(np.asarray(current_motif_coord_list),
                                       np.asarray(rotamer_contact_coords)))

                if len(dof_errors) == 0:
                    continue

                if sad_atom_in_rotamer:
                    continue

                # Continue if current rotamer does not have <{RMSD_limit}A RMSD with any contact mode
                if contact_method == 'RMSD' and min(contact_RMSDs,
                                                    default=666) > RMSD_limit:
                    rotamer_info.append((contact_RMSDs, None, None))
                    continue

                # Only continue if a contact mode exists where max angle/torsion DOF error < 10 degrees
                if contact_method == 'matcher' and min(dof_errors) > 15:
                    continue

                # Apply minimization to rotamer-ligand interaction before deciding to accept
                if apply_minimization:
                    minimize_motif.apply(match_pose_clone)

                # Evaluate possible clashes (fa_rep) with motif residues and ligand
                sfxn(match_pose_clone)
                edges = match_pose_clone.energies().energy_graph()

                motif_fa_rep = list()
                for motif in motif_and_ligand_resnums:
                    current_edge = edges.find_energy_edge(position, motif)
                    if current_edge is not None:
                        current_edge.fill_energy_map()
                        motif_fa_rep.append(
                            current_edge[rosetta.core.scoring.fa_rep])

                # Get score for current rotamer against ligand
                current_edge = edges.find_energy_edge(position,
                                                      conformer_resnum)
                rotamer_ligand_reu = current_edge.dot(
                    sfxn_weights) if current_edge is not None else 0

                if all([
                        min(motif_fa_rep, default=666) < 20,
                        rotamer_ligand_reu <= 20
                ]):

                    if flag_special_rot:

                        current_rsd_type_ptr = match_pose_clone.residue_type_ptr(
                            position)
                        new_rsd_type_mutable = rosetta.core.chemical.MutableResidueType(
                            current_rsd_type_ptr)
                        new_rsd_type_mutable.add_variant_type(
                            rosetta.core.chemical.SPECIAL_ROT)
                        new_rsd_type = rosetta.core.chemical.ResidueType.make(
                            new_rsd_type_mutable)
                        rosetta.core.pose.replace_pose_residue_copying_existing_coordinates(
                            match_pose_clone, position, new_rsd_type)

                    # Place residue before applying to pose!!!!
                    # Rotamers need to be transformed back onto the backbone of the input pdb!!!
                    new_rotamer = match_pose_clone.residue(position).clone()
                    new_rotamer.place(match_pose.residue(position),
                                      match_pose.conformation_ptr())

                    position_rotamer_list.append(
                        (rotamer_ligand_reu, new_rotamer))
                    rotamers_accepted += 1

                    if dump_rotamerset_pdb:
                        current_rotamerset.add_rotamer(new_rotamer)

                rotamer_info.append(
                    (max(dof_errors), max(motif_fa_rep,
                                          default=0), rotamer_ligand_reu))

            print(
                f'{rotamers_accepted} of {match_rotamer_set.num_rotamers()} rotamers accepted'
            )
            rotamer_stats[position][contact_residue][
                'rotamer_info'] = rotamer_info
            rotamer_stats[position][contact_residue][
                'rotamers_accepted'] = rotamers_accepted

            if len(position_rotamer_list) > 0:
                position_rotamer_list_selected = sorted(
                    position_rotamer_list, key=lambda x: x[0])[:rotset_limit]
                position_rotamer_list = [
                    rot[1] for rot in position_rotamer_list_selected
                ]
                if position not in viable_rotamers.keys():
                    viable_rotamers[position] = dict()
                viable_rotamers[position][
                    contact_residue] = position_rotamer_list

        if dump_rotamerset_pdb:
            current_moltresid = all_rotamersets.resid_2_moltenres(position)
            all_rotamersets.set_explicit_rotamers(current_moltresid,
                                                  current_rotamerset)

    if dump_rotamerset_pdb:
        current_extrachi = len([
            rosetta.basic.options.get_boolean_option(f'packing:ex{i}')
            for i in range(1, 5) if
            rosetta.basic.options.get_boolean_option(f'packing:ex{i}') is True
        ])
        current_sample_level = rosetta.basic.options.get_integer_option(
            f'packing:ex{current_extrachi}:level')

        if current_extrachi <= 2 and current_sample_level <= 3:
            match_name = os.path.normpath(os.path.basename(match_path))

            # todo: figure out why this doesn't work... problem with CONECT records...
            # all_rotamersets.dump_pdb(match_pose_clone, f"{match_name.split('.')[0]}-extrachi_{current_extrachi}-sampling_{current_sample_level}.pdb")

            all_rotamers_pose = pyrosetta.pose_from_sequence('A')

            for position in match_residue_map.keys():
                position_rotset = all_rotamersets.rotamer_set_for_residue(
                    position)
                for rot in range(1, position_rotset.num_rotamers() + 1):
                    all_rotamers_pose.append_residue_by_jump(
                        position_rotset.rotamer(rot), 1)
            all_rotamers_pose.dump_pdb(
                f"{match_name.split('.')[0]}-extrachi_{current_extrachi}-sampling_{current_sample_level}.pdb"
            )

    if report_stats:
        return viable_rotamers, rotamer_stats
    else:
        return viable_rotamers
示例#26
0
def rmsd(a, b):
    """Return the RMSD between two sets of coordinates."""
    t = pr.calcTransformation(a, b)
    return pr.calcRMSD(t.apply(a), b)
示例#27
0
    def find_possible_ifgs_rmsd(self, comb, rmsd_threshold=1.0):
        """uses iFG definitions in comb object to select iFGs in the parsed protein object that have all atoms
        and occupancies = 1.
        """
        possible_ifgs = []
        if comb.num_res_ifg_query == 1:
            poss_ifg_sel = self.prody_pdb.select('segment A and chain ' +
                                                 self.pdb_chain +
                                                 ' sequence "' +
                                                 comb.ifg_seq_str_query + '"')
            if poss_ifg_sel is not None:
                ifg_resindices, indices = np.unique(
                    poss_ifg_sel.getResindices(), return_index=True)
                ifg_resnames = poss_ifg_sel.getResnames()[indices]

                for ifg_resindex, ifg_resname in zip(ifg_resindices,
                                                     ifg_resnames):
                    ifg_selection = self.prody_pdb.select(
                        'resindex ' + str(ifg_resindex) + ' and name ' +
                        comb.ifg_sele_dict_query[1][ifg_resname])
                    if ifg_selection is not None:
                        num_atoms = len(ifg_selection)
                        if num_atoms == len(comb.ifg_sele_dict_query[1]
                                            [ifg_resname].split()):
                            if all(ifg_selection.getResnums() > 0):
                                possible_ifgs.append(ifg_selection)
                comb.total_possible_ifgs += len(possible_ifgs)
        else:
            poss_ifg_sel = self.prody_pdb.select('segment A and chain ' +
                                                 self.pdb_chain +
                                                 ' sequence "' +
                                                 comb.ifg_seq_str_query + '"')
            if poss_ifg_sel is not None:
                ifg_resindices_cat_list, indices = np.unique(
                    poss_ifg_sel.getResindices(), return_index=True)
                ifg_resnames_cat_list = poss_ifg_sel.getResnames()[indices]
                ifg_resindex_pairs = [
                    ifg_resindices_cat_list[i:i + 2]
                    for i in range(0, len(ifg_resindices_cat_list), 2)
                ]
                ifg_resname_pairs = [
                    ifg_resnames_cat_list[i:i + 2]
                    for i in range(0, len(ifg_resnames_cat_list), 2)
                ]
                for ifg_resindex_pair, ifg_resname_pair in zip(
                        ifg_resindex_pairs, ifg_resname_pairs):
                    resind1, resind2 = ifg_resindex_pair
                    resname1, resname2 = ifg_resname_pair
                    try:
                        ifg_selection = self.prody_pdb.select(
                            '(resindex ' + str(resind1) + ' and name ' +
                            comb.ifg_sele_dict_query[1][resname1] + ')' +
                            ' or (resindex ' + str(resind2) + ' and name ' +
                            comb.ifg_sele_dict_query[2][resname2] + ')')
                    except KeyError:
                        print('Non-canonical residue in iFG, skipping.')
                        ifg_selection = None
                    if ifg_selection is not None:
                        num_atoms = len(ifg_selection)
                        names = comb.ifg_sele_dict_query[1][resname1].split()
                        names.extend(
                            comb.ifg_sele_dict_query[2][resname2].split())
                        if num_atoms == len(names):
                            if all(ifg_selection.getResnums() > 0):
                                possible_ifgs.append(ifg_selection)
                comb.total_possible_ifgs += len(possible_ifgs)

        passed_possible_ifgs = []
        for pifg in possible_ifgs:
            com = pr.calcCenter(
                pifg.select('name ' + ' '.join(comb.query_names[0])))
            q2_sel = self.prody_pdb.select(
                'name ' + ' '.join(comb.query_names[1]) + ' within ' +
                str(comb.query_distance) + ' of center',
                center=com)
            if q2_sel is not None:
                resinds_query2s = np.unique(q2_sel.getResindices())
                q_sel1_coords = [
                    pifg.select('name ' + n).getCoords()[0]
                    for n in comb.query_names[0]
                ]
                for resind in resinds_query2s:
                    q_sel = self.prody_pdb.select(
                        'name ' + ' '.join(comb.query_names[1]) +
                        ' and resindex ' + str(resind))
                    if len(q_sel) == len(comb.query_names[1]):
                        q_sel2_coords = [
                            q_sel.select('name ' + n).getCoords()[0]
                            for n in comb.query_names[1]
                        ]
                        pifg_coords = np.vstack((q_sel1_coords, q_sel2_coords))
                        for coords in comb.query_coords:
                            R, m_com, t_com = get_rot_trans(
                                coords, pifg_coords)
                            coords_transformed = np.dot(
                                (coords - m_com), R) + t_com
                            rmsd = pr.calcRMSD(coords_transformed, pifg_coords)
                            if rmsd <= rmsd_threshold:
                                passed_possible_ifgs.append(
                                    q_sel
                                )  # This only takes the query2 selection as the iFG.
                                break
        return passed_possible_ifgs