def matchAlign(mobile, target, **kwargs): """Superpose *mobile* onto *target* based on best matching pair of chains. .. versionadded:: 0.7.1 This function makes use of :func:`matchChains` for matching chains. This function returns a tuple that contains the following items: * *mobile* after it is superposed, * Matching chain from *mobile* as a :class:`~prody.atomic.AtomMap` instance, * Matching chain from *target* as a :class:`~prody.atomic.AtomMap` instance, * Percent sequence identity of the match, * Percent sequence overlap of the match. """ match = matchChains(mobile, target, **kwargs) if not match: return match = match[0] LOGGER.info('RMSD before alignment (A): {0:.2f}' .format(prody.calcRMSD(match[0], match[1]))) prody.calcTransformation(match[0], match[1]).apply(mobile) LOGGER.info('RMSD after alignment (A): {0:.2f}' .format(prody.calcRMSD(match[0], match[1]))) return (mobile,) + match
def clusterize(sorted_ids): """Clusters the structures identified by the IDS inside sorted_ids list""" clusters_found = 0 clusters = {clusters_found: [sorted_ids[0]]} # Read all structures backbone atoms backbone_atoms = get_backbone_atoms(sorted_ids) for j in sorted_ids[1:]: log.info("Glowworm %d with pdb lightdock_%d.pdb" % (j, j)) in_cluster = False for cluster_id in list(clusters.keys()): # For each cluster representative representative_id = clusters[cluster_id][0] rmsd = calcRMSD(backbone_atoms[representative_id], backbone_atoms[j]).round(4) log.info('RMSD between %d and %d is %5.3f' % (representative_id, j, rmsd)) if rmsd <= 4.0: clusters[cluster_id].append(j) log.info("Glowworm %d goes into cluster %d" % (j, cluster_id)) in_cluster = True break if not in_cluster: clusters_found += 1 clusters[clusters_found] = [j] log.info("New cluster %d" % clusters_found) return clusters
def calc_rmsd_matrix_intra(self, align=False, sel='all'): ag = self.ag.copy() rmsd = [] for i in range(ag.numCoordsets()): ag.setACSIndex(i) if align: prody.alignCoordsets(ag.select(sel)) rmsd.append([prody.calcRMSD(ag.select(sel))]) rmsd = np.concatenate(rmsd) return rmsd
def compare_pdb_files(file1, file2): """Returns the RMSD between two PDB files of the same protein. Args: file1 (str): Path to first PDB file. file2 (str): Path to second PDB file. Must be the same protein as in file1. Returns: float: Root Mean Squared Deviation (RMSD) between the two structures. """ s1 = pr.parsePDB(file1) s2 = pr.parsePDB(file2) transformation = pr.calcTransformation(s1, s2) s1_aligned = transformation.apply(s1) return pr.calcRMSD(s1_aligned, s2)
def prody_align(opt): """Align models in a PDB file or a PDB file onto others.""" import prody LOGGER = prody.LOGGER args = opt.pdb if len(args) == 1: pdb = args[0] LOGGER.info('Aligning multiple models in: ' + pdb) selstr, prefix, model = opt.select, opt.prefix, opt.model pdb = prody.parsePDB(pdb) pdbselect = pdb.select(selstr) if pdbselect is None: LOGGER.warning('Selection "{0:s}" do not match any atoms.' .format(selstr)) sys.exit(-1) LOGGER.info('{0:d} atoms will be used for alignment.' .format(len(pdbselect))) pdb.setACSIndex(model-1) prody.alignCoordsets(pdb, selstr=selstr) rmsd = prody.calcRMSD(pdb) LOGGER.info('Max RMSD: {0:0.2f} Mean RMSD: {1:0.2f}' .format(rmsd.max(), rmsd.mean())) if prefix == '': prefix = pdb.getTitle() + '_aligned' outfn = prefix + '.pdb' LOGGER.info('Writing file: ' + outfn) prody.writePDB(outfn, pdb) else: reffn = args.pop(0) LOGGER.info('Aligning structures onto: ' + reffn) ref = prody.parsePDB(reffn) for arg in args: if arg == reffn: continue if '_aligned.pdb' in arg: continue pdb = prody.parsePDB(arg) if prody.matchAlign(pdb, ref): outfn = pdb.getTitle() + '_aligned.pdb' LOGGER.info('Writing file: ' + outfn) prody.writePDB(outfn, pdb) else: LOGGER.warning('Failed to align ' + arg)
def calc_rmsd_with(self, mol, align=False, sel='all'): ag1 = self.ag.copy() ag2 = mol.ag.copy() sel1 = ag1.select(sel).copy() sel2 = ag2.select(sel).copy() if sel1 is None or sel2 is None: raise RuntimeError('Selection is empty') if sel1.numAtoms() != sel2.numAtoms(): raise RuntimeError('Selections are different') merged = np.concatenate([sel1.getCoordsets(), sel2.getCoordsets()]) n1, n2 = sel1.numCoordsets(), sel2.numCoordsets() sel1.setCoords(merged) rmsd = [] for i in range(n1): sel1.setACSIndex(i) if align: prody.alignCoordsets(sel1) rmsd.append([prody.calcRMSD(sel1)[n1:]]) rmsd = np.concatenate(rmsd) return rmsd
def corepagecalculation(pdbfilename, selatom, noma1, nummodes, gamcut, cut1, gam2, cut2, showresults, smodes, snmd, smodel, scollec, massnomass, sample1, modeens, confens, rmsdens, traverse1, modetra, steptra, rmsdtra, modelnumber, caanm, cagnm, nohanm, nohgnm, allanm, allgnm, bbanm, bbgnm, scanm, scgnm, nmdfolder, modesfolder, collectivityfolder, modelnewname, nmdnewname, modesnewname, modesendname, collectivitynewname, collectivityendname, samplenewname, traversenewname, crosscorr=0, corrfolder='', corrname='', corrend='', compmode01='7', compmode02='15', sqflucts=0, sqfluctsfolder='', sqfluctsname='', sqfluctsend='', separatevar1='0', temfac=0, temfacfolder='', temfacname='', temfacend='', fracovar=0, fraconame='', fracoend='', ovlap=0, ovlapfold='', ovlapname='', ovlapend='', ovlaptab=0, ovlaptabname='', ovlaptabend='', comppdbfilename=''): # modelnumber import prody import time import os import Tkinter root=Tkinter.Tk() root.title('Info') onlypage=Tkinter.Frame(root) onlypage.pack(side='top') Tkinter.Label(onlypage,text='File: '+pdbfilename).grid(row=0,column=0,sticky='w') Tkinter.Label(onlypage,text='Atoms: '+selatom).grid(row=1,column=0,sticky='w') Tkinter.Label(onlypage,text='Analysis: '+noma1).grid(row=2,column=0,sticky='w') path=os.path.join(os.path.expanduser('~'),'.noma/') fin = open(path+'savefile.txt','r') global savedfile savedfile=fin.readlines() fin.close() i=0 a=len(savedfile) while i<a: savedfile[i]=savedfile[i][:-1] i+=1 if gamcut=='0': Tkinter.Label(onlypage,text='Gamma: r^'+savedfile[91]).grid(row=3,column=0,sticky='w') Tkinter.Label(onlypage,text='Cutoff: '+cut1).grid(row=4,column=0,sticky='w') elif gamcut=='1': Tkinter.Label(onlypage,text='Gamma: '+gam2).grid(row=3,column=0,sticky='w') Tkinter.Label(onlypage,text='Cutoff: '+cut2).grid(row=4,column=0,sticky='w') find = 0 # while find < len(pdbfilename): # if pdbfilename[-(find+1):-find] == '/': # bgn = len(pdbfilename)-find # break # else: # helps in the find +=1 # saving of files try: # float(bgn) # except (NameError): # bgn = 0 # find = 0 # while bgn+find<len(pdbfilename): # if pdbfilename[bgn+find:bgn+find+1] == '.': # end = len(pdbfilename)-(bgn+find) # break # else: # find +=1 # try: # name = pdbfilename[bgn:-end] # except (NameError): # name = pdbfilename[bgn:len(pdbfilename)] # name of the file bgn = pdbfilename[:bgn] # path for file mytimeis = time.asctime(time.localtime(time.time())) start = time.time() try: p38 = prody.parsePDB(pdbfilename,model=int(modelnumber)) except: import tkMessageBox tkMessageBox.askokcancel("File Error","""This is not the correct path or name. Try entering /some/path/nameoffile.pdb If you need help finding the path, open a new terminal and enter: find -name 'filename.pdb' use the output as the pdb input If this doesn't work, make sure the file is in PDB format.""") p38 = prody.parsePDB(pdbfilename) print 'Submitted: '+pdbfilename+' at '+mytimeis Tkinter.Label(onlypage,text='Submitted at: '+mytimeis).grid(row=5,column=0,sticky='w') root.update() if selatom == "C-alpha" and noma1 == "Gaussian Normal Mode": folder = cagnm+'/' pro = p38.select('protein and name CA') # selects only carbon alpahs elif selatom == "C-alpha" and noma1 == "Anisotropic Normal Mode": folder = caanm+'/' pro = p38.select('protein and name CA') elif selatom == "Heavy" and noma1 == "Gaussian Normal Mode": folder = nohgnm+'/' pro = p38.select('protein and not name "[1-9]?H.*"') # gets rid of all Hydrogens elif selatom == "Heavy" and noma1 == "Anisotropic Normal Mode": folder = nohanm+'/' pro = p38.select('protein and not name "[1-9]?H.*"') elif selatom == "All" and noma1 == "Gaussian Normal Mode": folder = allgnm+'/' pro = p38.select('protein') elif selatom == "All" and noma1 == "Anisotropic Normal Mode": folder = allanm+'/' pro = p38.select('protein') elif selatom == "Backbone" and noma1 == "Gaussian Normal Mode": folder = bbgnm+'/' pro = p38.select('protein and name CA C O N H') # selects backbone elif selatom == "Backbone" and noma1 == "Anisotropic Normal Mode": folder = bbanm+'/' pro = p38.select('protein and name CA C O N H') # selects backbone elif selatom == "Sidechain" and noma1 == "Gaussian Normal Mode": folder = scgnm+'/' pro = p38.select('protein and not name CA C O N H') # selects sidechain elif selatom == "Sidechain" and noma1 == "Anisotropic Normal Mode": folder = scanm+'/' pro = p38.select('protein and not name CA C O N H') # selects sidechain try: # open(bgn+folder) # creates the folders except (IOError): # where the files will try: # be saved only if they os.makedirs(bgn+folder) # are not there except (OSError): # mer = 0 # if noma1 == "Gaussian Normal Mode": print 'Building the Kirchhoff matrix' Tkinter.Label(onlypage,text='Building Kirchhoff').grid(row=6,column=0,sticky='w') root.update() anm = prody.GNM(name)### if gamcut=='0': anm.buildKirchhoff(pro,cutoff=float(cut1),gamma=gammaDistanceDependent)### anm.setKirchhoff(anm.getKirchhoff()) elif gamcut=='1': anm.buildKirchhoff(pro,cutoff=float(cut2),gamma=float(gam2))### brat = 2 elif noma1 == "Anisotropic Normal Mode": print 'Building the Hessian matrix' Tkinter.Label(onlypage,text='Building Hessian').grid(row=6,column=0,sticky='w') root.update() anm = prody.ANM(name)### if gamcut=='0': anm.buildHessian(pro,cutoff=float(cut1),gamma=gammaDistanceDependent)### anm.setHessian(anm.getHessian())### elif gamcut=='1': anm.buildHessian(pro,cutoff=float(cut2),gamma=float(gam2))### brat = 7 print 'Calculating modes' Tkinter.Label(onlypage,text='Calculating modes').grid(row=7,column=0,sticky='w') root.update() anm.calcModes(int(nummodes),zeros = True)### numatom=anm.numAtoms()### eigval=anm.getEigvals()### atomname=pro.getNames()### if smodel==1: if brat==2: modelfilename=bgn+folder+name+modelnewname+'.gnm.npz' elif brat==7: modelfilename=bgn+folder+name+modelnewname+'.anm.npz' print 'Saving Model' Tkinter.Label(onlypage,text='Saving Model').grid(row=8,column=0,sticky='w') root.update() try: prody.saveModel(anm,bgn+folder+name+modelnewname,True)### except: print 'Matrix not saved due to size' Tkinter.Label(onlypage,text='Matrix not saved').grid(row=8,column=0,sticky='w') root.update() prody.saveModel(anm,bgn+folder+name+modelnewname)### if snmd==1: print 'Saving NMD' Tkinter.Label(onlypage,text='Saving NMD').grid(row=9,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+nmdfolder+'/') # except (OSError): # mer = 0 # prody.writeNMD(bgn+folder+nmdfolder+'/'+name+nmdnewname+'.nmd',anm[:len(eigval)],pro)### # this can be viewed in VMD if smodes==1: print 'Saving Modes' Tkinter.Label(onlypage,text='Saving Modes').grid(row=10,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+modesfolder+'/') # except (OSError): # mer = 0 # modefile = bgn+folder+modesfolder+'/'+name+modesnewname+'.'+modesendname fout = open(modefile,'w') mer = 0 while mer< len(eigval): slowest_mode = anm[mer]### r = slowest_mode.getEigvec()### p = slowest_mode.getEigval()### tq = 0 tt = 0 ttt = 1 tttt = 2 fout.write('MODE {0:3d} {1:15e}'.format(mer+1,p)) fout.write(""" ------------------------------------------------- """) if noma1 == "Gaussian Normal Mode": while tq < numatom: fout.write("""{0:4s}{1:15e} """.format(atomname[tq],r[tq])) tq +=1 elif noma1 == "Anisotropic Normal Mode": while tt < numatom*3: fout.write("""{0:4s}{1:15e}{2:15e}{3:15e} """.format(atomname[tq],r[tt],r[ttt],r[tttt])) tq+=1 tt +=3 ttt+=3 tttt+=3 mer +=1 fout.close() if showresults=='1': os.system('/usr/bin/gnome-open '+modefile) if scollec==1: print 'Saving collectivity' Tkinter.Label(onlypage,text='Saving collectivity').grid(row=11,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+collectivityfolder+'/') # except (OSError): # mer = 0 # mer = 0 xx = [0]*(numatom) # sets the array to zero and other initial conditions i = 0 aa = 0 no = 0 var3 = 0 sss = [0]*(len(eigval)) while mer< len(eigval): slowest_mode = anm[mer]### r = slowest_mode.getEigvec()### p = slowest_mode.getEigval()### a = 0 tt = 0 ttt = 1 tttt = 2 while a < numatom: atom = atomname[a] mass = 0 while mass < 2: if atom[mass] == "N": # all nitrogen m = 14.0067 break elif atom[mass] == 'H': # all hydrogen m = 1.00794 break elif atom[mass] == "C" : # all carbon m = 12.0107 break elif atom[mass] == "O" : # all oxygen m = 15.9994 break elif atom[mass] == 'S': # all sulfur m = 32.065 break elif atom[mass] == 'P' : # all phosphorus m = 30.973762 break else: if mass == 0: mass +=1 try: atom[mass] except (IndexError): m = 1 if no == 0: print 'Enter atom '+atom+' in to the system. Its mass was set to 1 in this simulation.' no +=1 break else: m = 1 if no == 0: print 'Enter atom '+atom+' in to the system. Its mass was set to 1 in this simulation' no +=1 break if len(r)/numatom == 3: xx[i] = (r[tt]**2 + r[ttt]**2 + r[tttt]**2)/m i +=1 tt +=3 ttt+=3 tttt+=3 else: xx[i] = (r[tt]**2)/m i +=1 tt +=1 a +=1 var3 = 0 j = 0 loop = 1 while loop == 1: if sum(xx) == 0: # need this because you can't divide by 0 loop = 0 elif j <(numatom): var1 = xx[j]/sum(xx) if var1 == 0: var2 = 0 elif var1 != 0: from math import log # this means natural log var2 = var1* log(var1) var3 += var2 j +=1 else: from math import exp k = exp(-var3)/numatom sss[aa] = k, aa+1 aa +=1 mer +=1 loop = 0 i = 0 xx = [0]*(numatom) # goes through all this until the big loop is done a = 0 k=[0]*(len(eigval)) while a < len(eigval): k[a]=prody.calcCollectivity(anm[a]),a+1 a +=1 collectivefile = bgn+folder+collectivityfolder+'/'+name+collectivitynewname+'.'+collectivityendname fout = open(collectivefile,'w') if massnomass=='0': fout.write('MODE COLLECTIVITY(mass)') fout.write(""" --------------------------- """) for h in sorted(sss,reverse=True): fout.write(str(h)[-3:-1]+' '+str(h)[1:19]+""" """) fout.write(""" MODE COLLECTIVITY(without mass)""") fout.write(""" --------------------------- """) for hh in sorted(k,reverse=True): fout.write(str(hh)[-3:-1]+' '+str(hh)[1:19]+""" """) elif massnomass=='1': fout.write('MODE COLLECTIVITY(without mass)') fout.write(""" --------------------------- """) for hh in sorted(k,reverse=True): fout.write(str(hh)[-3:-1]+' '+str(hh)[1:19]+""" """) fout.write(""" MODE COLLECTIVITY(mass)""") fout.write(""" --------------------------- """) for h in sorted(sss,reverse=True): fout.write(str(h)[-3:-1]+' '+str(h)[1:19]+""" """) fout.close() if showresults=='1': os.system('/usr/bin/gnome-open '+collectivefile) fin = open(collectivefile,'r') lst = fin.readlines() hi0 = 2 looop = 1 prut=0 secoll=0 thicoll=0 while looop == 1: fine = lst[hi0] if int(fine[0:2]) >= brat: if prut==0: prut=fine[0:2] elif secoll==0: secoll=fine[0:2] elif thicoll==0: thicoll=fine[0:2] else: foucoll=fine[0:2] looop = 0 else: hi0 +=1 mostcollective= "Mode "+prut+" is the most collective." Tkinter.Label(onlypage,text='Mode '+prut+' is the most collective').grid(row=12,column=0,sticky='w') root.update() print mostcollective fin.close() if sample1 == 1: print 'Saving sample file' Tkinter.Label(onlypage,text='Saving sample file').grid(row=13,column=0,sticky='w') root.update() a = modeens+' ' b = [0]*(len(a)+1) i = 0 j = 0 b1 = 0 while i < len(a): if a[i:i+1] ==' ' or a[i:i+1]==',': try: b[b1]=int(a[j:i])-1 except: if '1c' in a[j:i]: b[b1]=int(prut)-1 elif '2c' in a[j:i]: b[b1]=int(prut)-1 b1 +=1 b[b1]=int(secoll)-1 elif '3c' in a[j:i]: b[b1]=int(prut)-1 b1 +=1 b[b1]=int(secoll)-1 b1 +=1 b[b1]=int(thicoll)-1 elif '4c' in a[j:i]: b[b1]=int(prut)-1 b1 +=1 b[b1]=int(secoll)-1 b1 +=1 b[b1]=int(thicoll)-1 b1+=1 b[b1]=int(foucoll)-1 j = i+1 i +=1 b1 +=1 else: i +=1 del b[b1:] ensemble = prody.sampleModes(anm[b],pro, n_confs=int(confens), rmsd =float(rmsdens)) p38ens=pro.copy() p38ens.delCoordset(0) p38ens.addCoordset(ensemble.getCoordsets()) prody.writePDB(bgn+folder+name+samplenewname+'.pdb',p38ens) if traverse1 ==1: print 'Saving traverse file' Tkinter.Label(onlypage,text='Saving traverse file').grid(row=14,column=0,sticky='w') root.update() if modetra=='c': modefortra=int(prut)-1 else: modefortra=int(modetra)-1 trajectory=prody.traverseMode(anm[modefortra],pro,n_steps=int(steptra),rmsd=float(rmsdtra)) prody.calcRMSD(trajectory).round(2) p38traj=pro.copy() p38traj.delCoordset(0) p38traj.addCoordset(trajectory.getCoordsets()) prody.writePDB(bgn+folder+name+'_mode'+str(modefortra+1)+traversenewname+'.pdb',p38traj) if crosscorr==1: print 'Saving cross correlation' Tkinter.Label(onlypage,text='Saving cross-correlation').grid(row=15,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+corrfolder+'/') # except (OSError): # mer = 0 i=int(compmode01) while i <= int(compmode02): x=i-1 correlationdataname=bgn+folder+corrfolder+'/'+name+corrname+'_mode'+str(x+1)+'.'+corrend prody.writeArray(correlationdataname,prody.calcCrossCorr(anm[x]),'%.18e') print correlationdataname i+=1 ## if sqflucts==1: print 'Saving square fluctuation' Tkinter.Label(onlypage,text='Saving square fluctuation').grid(row=16,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+sqfluctsfolder+'/') # except (OSError): # mer = 0 i=int(compmode01) while i < int(compmode02): yelp = i-1 sqfluctdataname = bgn+folder+sqfluctsfolder+'/'+name+sqfluctsname+'_mode'+str(yelp+1)+'.'+sqfluctsend fout = open(sqfluctdataname,'w') if separatevar1=='0': a = 0 while a < numatom: fout.write(str(a)) fout.write(""" """) fout.write(str(prody.calcSqFlucts(anm[yelp])[a])) fout.write(""" """) a +=1 elif separatevar1=='1': a=0 while a <numatom: firstresnum=int(p38.getResnums()[0:1][0]) origiresnum=int(p38.getResnums()[0:1][0]) while firstresnum<(int(numatom*1.0/p38.numChains())+origiresnum): fout.write(str(firstresnum)) fout.write('\t') fout.write(str(prody.calcSqFlucts(anm[yelp])[a])) fout.write('\n') a+=1 firstresnum+=1 fout.write('&\n') fout.close() print sqfluctdataname i+=1 if temfac==1: print 'Saving temperature factors' Tkinter.Label(onlypage,text='Saving temperature factors').grid(row=17,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+temfacfolder+'/') # except (OSError): # mer = 0 fin=open(pdbfilename,'r') d = [None]*len(atomname) e = 0 for line in fin: pair = line.split() if 'ATOM ' in line and e < len(atomname): if str(pair[2]) == str(atomname[e]): d[e]=str(pair[1]) e+=1 else: e+=0 else: continue fin.close() sqf = prody.calcSqFlucts(anm) x = sqf/((sqf**2).sum()**.5) y = prody.calcTempFactors(anm,pro) a = 0 tempfactorsdataname =bgn+folder+temfacfolder+'/'+name+temfacname+'.'+temfacend fout=open(tempfactorsdataname,'w') fout.write("""Atom Residue TempFactor TempFactor with exp beta """) while a < numatom: fout.write("""{0:4s} {1:4d} {2:15f} {3:15f} """.format(d[a],a+1,x[a],y[a])) a +=1 fout.close() print tempfactorsdataname if fracovar==1: try: import matplotlib.pyplot as plt print 'Saving Fraction of Variance' Tkinter.Label(onlypage,text='Saving Fraction of Variance').grid(row=18,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+modesfolder+'/') # except (OSError): # mer = 0 # plt.figure(figsize = (5,4)) prody.showFractVars(anm) prody.showCumulFractVars(anm) fracvardataname =bgn+folder+modesfolder+'/'+name+fraconame+'.'+fracoend plt.savefig(fracvardataname) print fracvardataname if showresults=='1': os.system('/usr/bin/gnome-open '+fracvardataname) except: print 'Error: Fraction of Variance' Tkinter.Label(onlypage,text='Error: Fraction of Variance').grid(row=18,column=0,sticky='w') root.update() mer=0 if ovlap==1 or ovlaptab==1: try: import matplotlib.pyplot as plt print 'Saving Overlap' Tkinter.Label(onlypage,text='Saving Overlap').grid(row=19,column=0,sticky='w') root.update() Tkinter.Label(onlypage,text='Comparison: '+comppdbfilename).grid(row=20,column=0,sticky='w') ## find = 0 while find < len(comppdbfilename): if comppdbfilename[-(find+1):-find] == '/': bgn1 = len(comppdbfilename)-find break else: find +=1 try: float(bgn1) except (NameError): bgn1 = 0 find = 0 while bgn1+find<len(comppdbfilename): if comppdbfilename[bgn1+find:bgn1+find+1] == '.': end1 = len(comppdbfilename)-(bgn1+find) break else: find +=1 try: name1 = comppdbfilename[bgn1:-end1] except (NameError): name1 = comppdbfilename[bgn1:len(comppdbfilename)] bgn1 = comppdbfilename[:bgn1] p381 = prody.parsePDB(comppdbfilename,model=int(modelnumber)) if selatom == "C-alpha" and noma1 == "Gaussian Normal Mode": pro1 = p381.select('protein and name CA') elif selatom == "C-alpha" and noma1 == "Anisotropic Normal Mode": pro1 = p381.select('protein and name CA') elif selatom == "Heavy" and noma1 == "Gaussian Normal Mode": pro1 = p381.select('protein and not name "[1-9]?H.*"') elif selatom == "Heavy" and noma1 == "Anisotropic Normal Mode": pro1 = p381.select('protein and not name "[1-9]?H.*"') elif selatom == "All" and noma1 == "Gaussian Normal Mode": pro1 = p381.select('protein') elif selatom == "All" and noma1 == "Anisotropic Normal Mode": pro1 = p381.select('protein') elif selatom == "Backbone" and noma1 == "Gaussian Normal Mode": pro1 = p381.select('protein and name CA C O N H') elif selatom == "Backbone" and noma1 == "Anisotropic Normal Mode": pro1 = p381.select('protein and name CA C O N H') elif selatom == "Sidechain" and noma1 == "Gaussian Normal Mode": pro1 = p381.select('protein and not name CA C O N H') elif selatom == "Sidechain" and noma1 == "Anisotropic Normal Mode": pro1 = p381.select('protein and not name CA C O N H') if noma1 == "Gaussian Normal Mode": print 'Building the Kirchhoff matrix' Tkinter.Label(onlypage,text='Building Kirchhoff').grid(row=21,column=0,sticky='w') root.update() anm1 = prody.GNM(name1) if gamcut=='0': anm1.buildKirchhoff(pro1,cutoff=float(cut1),gamma=gammaDistanceDependent) anm1.setKirchhoff(anm1.getKirchhoff()) elif gamcut=='1': anm1.buildKirchhoff(pro1,cutoff=float(cut2),gamma=float(gam2)) brat = 2 elif noma1 == "Anisotropic Normal Mode": print 'Building the Hessian matrix' Tkinter.Label(onlypage,text='Building Hessian').grid(row=21,column=0,sticky='w') root.update() anm1 = prody.ANM(name1) if gamcut=='0': anm1.buildHessian(pro1,cutoff=float(cut1),gamma=gammaDistanceDependent) anm1.setHessian(anm1.getHessian()) elif gamcut=='1': anm1.buildHessian(pro1,cutoff=float(cut2),gamma=float(gam2)) brat = 7 print 'Calculating modes' Tkinter.Label(onlypage,text='Calculating modes').grid(row=22,column=0,sticky='w') root.update() anm1.calcModes(int(nummodes),zeros = True) ## try: os.makedirs(bgn+folder+ovlapfold+'/') except (OSError): mer = 0 if ovlap==1: i=int(compmode01) while i < int(compmode02): a = i-1 plt.figure(figsize=(5,4)) prody.showCumulOverlap(anm[a],anm1) prody.showOverlap(anm[a],anm1) plt.title('Overlap with Mode '+str(a+1)+' from '+name) plt.xlabel(name1+' mode index') overlapname = bgn+folder+ovlapfold+'/'+name+'_'+name1+ovlapname+'_mode'+str(a+1)+'.'+ovlapend plt.savefig(overlapname) print overlapname i+=1 if ovlaptab==1: plt.figure(figsize=(5,4)) prody.showOverlapTable(anm1,anm) plt.xlim(int(compmode01)-1,int(compmode02)) plt.ylim(int(compmode01)-1,int(compmode02)) plt.title(name1+' vs '+name+' Overlap') plt.ylabel(name1) plt.xlabel(name) overlapname = bgn+folder+ovlapfold+'/'+name+'_'+name1+ovlaptabname+'.'+ovlaptabend plt.savefig(overlapname) print overlapname except: mer=0 root.destroy() mynewtimeis = float(time.time()-start) if mynewtimeis <= 60.00: timeittook= "The calculations took %.2f s."%(mynewtimeis) elif mynewtimeis > 60.00 and mynewtimeis <= 3600.00: timeittook= "The calculations took %.2f min."%((mynewtimeis/60.00)) else: timeittook= "The calculations took %.2f hrs."%((mynewtimeis/3600.00)) print timeittook if smodel==1 and scollec==1: return (timeittook,modelfilename,str(int(prut))) elif scollec==1: return (timeittook,'nofile',str(int(prut))) elif smodel==1: return (timeittook,modelfilename,'nocoll') else: return (timeittook,'nofile','nocoll')
def calc_rmsd_to_frame(self, frame, align=False, sel='all'): ag = self.ag.copy() ag.setACSIndex(frame) if align: prody.alignCoordsets(ag.select(sel)) return prody.calcRMSD(ag.select(sel))
def calc(i, j): r = prody.calcRMSD(i, j) return (r, -1 * r**2)
def _do_align(self): self._transformation = prody.calcTransformation( self._prediction, self._native) self._transformation.apply(self._prediction) rmsd = prody.calcRMSD(self._native, self._prediction) self._align_results = RMSDAlignmentResult(rmsd)
def calc(i, j): """calculate RMSD""" mob, trans = prody.superpose(j, i) return prody.calcRMSD(i, mob)
def get_single_rmsd(reference, model): ref_backbone = reference.select('backbone or name OC2') mod_backbone = model.select('backbone or name OC2') prody.superpose(mod_backbone, ref_backbone) return prody.calcRMSD(mod_backbone, ref_backbone)
def find_rep_gene_iso_models(hi_res_iso_models, lo_res_iso_models, rep_rmsd_cutoff): ''' Function to pick representative iso gene models from a pool of representative pdb iso models. Uses a greedy algorithm to cover as much of the gene sequence as possible using first high resolution models and then filling any gaps with low resolution models ''' # only make a model a representative model if is at least 15 residues # long and if it includes at least 10 # residues that have never been seen in previous models or if it has # a significantly different conformation than previous models rep_gene_iso_models = [] min_length = 20 num_new_residue_cutoff = 10 rep_overlap_cutoff = 10 # tag each model with it's sequence coverage hi_res_iso_models = [[m, get_seq_range(m)] for m in hi_res_iso_models] lo_res_iso_models = [[m, get_seq_range(m)] for m in lo_res_iso_models] # sort lists of models by length of sequence coverage sorted_hi_res_iso_models = sorted(hi_res_iso_models, key=lambda m: -1 * len(m[1])) sorted_lo_res_iso_models = sorted(lo_res_iso_models, key=lambda m: -1 * len(m[1])) sorted_iso_models = sorted_hi_res_iso_models + sorted_lo_res_iso_models # use greedy algorithm to try to cover full gene sequence gene_coverage = [] # start with large hi res models, end with small lo res models for model in sorted_iso_models: model_file = model[0] model_coverage = model[1] # discrard structures that have too few number of residues if len(model_coverage) >= min_length: intersection = list(set(model_coverage) & set(gene_coverage)) num_new_residues = len(model_coverage) - len(intersection) # if rep model list is empty, make it a rep model if len(rep_gene_iso_models) == 0: rep_gene_iso_models.append(model_file) gene_coverage += model_coverage # otherwise, if this model has enough new residues, add it to the representatives list elif num_new_residues >= num_new_residue_cutoff: rep_gene_iso_models.append(model_file) gene_coverage += model_coverage gene_coverage = list(set(gene_coverage)) # otherwise check if it has a unique conformation else: model_struct = prody.parsePDB(model_file) redundant = False for rep_gene_iso_model in rep_gene_iso_models: rep_struct = prody.parsePDB( rep_gene_iso_model) # get structure # calc RMSD between model and rep alignment = prody.matchAlign(model_struct, rep_struct, overlap=rep_overlap_cutoff) if alignment != None: rmsd = prody.calcRMSD(alignment[1], alignment[2]) if rmsd <= rep_rmsd_cutoff: redundant = True # we already have a representative for this segment break # if the model does not match any of our representative models, # then it is unique - add it to the representative models list if not redundant: rep_gene_iso_models.append(model_file) gene_coverage += model_coverage gene_coverage = list(set(gene_coverage)) return rep_gene_iso_models
def score_interaction_and_dump(parsed, ifgresn, vdmresn, ifg_contact_atoms, vdm_contact_atoms, method, targetresi, cutoff, pdbix, pdbname): cutoff = float(cutoff) ifgtype, vdmtype, ifginfo, vdminfo = get_ifg_vdm(parsed, ifgresn, vdmresn, ifg_contact_atoms, vdm_contact_atoms, method) if ifgtype[1] != ['N', 'CA', 'C'] and ifgtype[1] != ['CA', 'C', 'O']: ifgresn = constants.AAname_rev[ifgtype[0]] vdmresn = constants.AAname_rev[vdmtype[0]] ifgatoms = ifgtype[1] vdmatoms = vdmtype[1] # filter for only vdmresn vdms of ifgresn with ifgatoms # and vdmatoms directly involved in interactions num_all_vdms, lookupdf = filter_contact(ifgresn, vdmresn, ifgatoms, vdmatoms) query = [] for atom in ifgatoms: query.append( parsed.select('chain {} and resnum {} and name {}'.format( ifginfo[0], ifginfo[1], atom)).getCoords()[0]) for atom in vdmatoms: query.append( parsed.select('chain {} and resnum {} and name {}'.format( vdminfo[0], vdminfo[1], atom)).getCoords()[0]) query = np.array(query) lookupcoords = pkl.load( open( '/home/gpu/Sophia/combs/st_wd/Lookups/refinedvdms/coords_of_{}.pkl' .format(ifgtype[0]), 'rb')) #lookupcoords = lookupcoords[:50] # delete ifglists = flip(ifgatoms, ifgresn) vdmlists = flip(vdmatoms, vdmresn) rmsds = [] num_atoms = len(query) coords_ls = [ item for item in lookupcoords if item[0] in lookupdf.index ] lookupatoms_to_clus = [] counter = 0 # to keep count of how many pdbs are being output for item in coords_ls: if len(item) == 3: compare_rmsds = [] ifg_vdm_ind = [] for ifg_ind, ifgls in enumerate(ifglists): for vdm_ind, vdmls in enumerate(vdmlists): lookupatoms = get_order_of_atoms( item, ifgresn, vdmresn, ifgls, vdmls) moved, transf = pr.superpose(lookupatoms, query) temp_rmsd = pr.calcRMSD(moved, query) compare_rmsds.append(temp_rmsd) ifg_vdm_ind.append([moved, temp_rmsd]) # item[0] is df index rmsds.append([item[0], min(compare_rmsds)]) # get index of which one had min rmsd for which_ind, each in enumerate(ifg_vdm_ind): if each[1] == min(compare_rmsds): lookupatoms_to_clus.append(each[0]) ######################################################################## # output pdb if low rmsd ######################################################################## if each[1] < cutoff and counter < 30 and which_ind == 0: # this is to ensure rmsd is below cutoff when not flipped # bc don't want to take care of that in prody to output pdb row = lookupdf.loc[item[0]] try: db_dir = '/home/gpu/Sophia/STcombs/20171118/database/reduce/' par = pr.parsePDB(db_dir + row['pdb'] + 'H.pdb') except: db_dir = '/home/gpu/Sophia/combs/st_wd/20180207_db_molprobity_biolassem/' par = pr.parsePDB(db_dir + row['pdb'] + 'H.pdb') ifgchid, ifgresnum = row['chid_ifg'], row[ 'resnum_ifg'] vdmchid, vdmresnum = row['chid_vdm'], row[ 'resnum_vdm'] printout = copy.deepcopy(par) printout = printout.select( '(chain {} and resnum {}) or (chain {} and resnum {})' .format(ifgchid, ifgresnum, vdmchid, vdmresnum)) printout.select('chain {} and resnum {}'.format( ifgchid, ifgresnum)).setChids('Y') printout.select('chain {} and resnum {}'.format( vdmchid, vdmresnum)).setChids('X') printout.select('all').setResnums(10) printout_interactamer = [] integrin_interactamer = [] try: # skip the ones that have segment ids. will prob need to update this # for the newly combed stuff for atom in ifgatoms: integrin_interactamer.append( parsed.select( 'chain {} and resnum {} and name {}' .format(ifginfo[0], ifginfo[1], atom))) printout_interactamer.append( printout.select( 'chain Y and resnum 10 and name {}' .format(atom))) for atom in vdmatoms: integrin_interactamer.append( parsed.select( 'chain {} and resnum {} and name {}' .format(vdminfo[0], vdminfo[1], atom))) printout_interactamer.append( printout.select( 'chain X and resnum 10 and name {}' .format(atom))) integrin_interactamer_prody = [] integrin_interactamer = sum( integrin_interactamer[1:], integrin_interactamer[0]) printout_interactamer = sum( printout_interactamer[1:], printout_interactamer[0]) try: assert len(integrin_interactamer) == len( printout_interactamer) interact_res = printout.select( '(chain X and resnum 10) or (chain Y and resnum 10)' ) interactamer_transf = pr.applyTransformation( transf, printout_interactamer) outdir = './output_data/pdbfiles/' threecode = constants.AAname[ifgresn] pr.writePDB( outdir + '{}_{}_{}_{}{}_{}{}_{}_{}'.format( pdbix, pdbname, targetresi, ifginfo[1], ifgresn, vdminfo[1], vdmresn, cutoff, row.name), interactamer_transf) counter += 1 except: pass except: traceback.print_exc() pass else: rmsds.append([int(item[0]), 100000]) # count how many NNs the query intrxn has num_nn, norm_metrics = get_NN(lookupatoms_to_clus, num_atoms, rmsds, query, cutoff, num_all_vdms) print('num NN') print(num_nn) exp_list = norm_metrics[-1] print('======= FOR NEAREST NEIGHBORS ==========') print('avg with single') print(exp_list[0]) print('avg without single') print(exp_list[1]) print('median with single') print(exp_list[2]) print('median without single') print(exp_list[3]) # do greedy clustering D = make_pairwise_rmsd_mat( np.array(lookupatoms_to_clus).astype('float32')) D = make_square(D) adj_mat = make_adj_mat(D, 0.5) mems, centroids = greedy(adj_mat) print('======= FOR GREEDY CLUS ==========') print('avg with singletons') print(np.mean([len(x) for x in mems])) print('avg without singletons') print(np.mean([len(x) for x in mems if len(x) > 1])) print('median with singletons') print(np.median([len(x) for x in mems])) print('median without singletons') print(np.median([len(x) for x in mems if len(x) > 1])) return ifginfo[0], ifginfo[1], ifgresn, vdminfo[0], vdminfo[1],\ vdmresn, ifgatoms, vdmatoms, num_nn, norm_metrics
def calcANMPathway( pdb_a, pdb_b, k=0.1, r_c=15, U0_a=0, U0_b=0, sa=0.8, sb=0.4, t_rmsd=0.1, tol=10 ** (-4), crit_rmsd=1, m=100, max_iter=100, ): import numpy as np import prody as pd import scipy as sci import scipy.spatial as sp def calc_dU(coords, coords_ref, cutoff=r_c, k=k): gnm = pd.GNM() gnm.buildKirchhoff(coords_ref, cutoff, k) kirchhoff = gnm.getKirchhoff() np.fill_diagonal(kirchhoff, 0) kirchhoff = abs(kirchhoff) n_atom = coords.shape[0] xi = coords[:, 0] yi = coords[:, 1] zi = coords[:, 2] xj = coords_ref[:, 0] yj = coords_ref[:, 1] zj = coords_ref[:, 2] xi, xj = np.meshgrid(xi, xj) yi, yj = np.meshgrid(yi, yj) zi, zj = np.meshgrid(zi, zj) mag = np.sqrt(np.square(xi - xj) + np.square(yi - yj) + np.square(zi - zj)) np.fill_diagonal(mag, -1) D = sp.distance.squareform(sp.distance.pdist(coords, metric="euclidean")) D0 = sp.distance.squareform(sp.distance.pdist(coords_ref, metric="euclidean")) dU = np.multiply(kirchhoff, D - D0) dU = dU / np.max(abs(dU)) dUx = np.multiply(dU, np.divide(xi - xj, mag)) dUy = np.multiply(dU, np.divide(yi - yj, mag)) dUz = np.multiply(dU, np.divide(zi - zj, mag)) # dUx = np.nansum(sci.triu(dUx)) # dUy = np.nansum(sci.triu(dUy)) # dUz = np.nansum(sci.triu(dUz)) dUx = np.sum(dUx, axis=1) / dU.shape[1] dUy = np.sum(dUy, axis=1) / dU.shape[1] dUz = np.sum(dUz, axis=1) / dU.shape[1] return dUx, dUy, dUz def findCuspStruct(pdb_a, pdb_b, ensemble_ref, m=m): ensemble = pd.Ensemble() ensemble.setAtoms(pdb_a) ensemble.setCoords(pdb_a.getCoords()) conf_i = pdb_a.copy() conf_f = pdb_b.copy() conf_f, T = pd.superpose(conf_f, conf_i) v = conf_f.getCoords() - conf_i.getCoords() for i in np.linspace(0, 1, m): q = i p = 1 - q coords = (p * v) + conf_i.getCoords() ensemble.addCoordset(coords) E_trans = calcMultiStateEnergy(ensemble, ensemble_ref, cutoff=r_c, k=k) E_trans = E_trans / np.max(E_trans) diff_E = abs(E_trans[0, :] - E_trans[1, :]) ind_trans = np.argmin(diff_E) coords = ensemble[ind_trans].getCoords() return (coords, diff_E[ind_trans]) def minimize(coords, coords_ref, s, cutoff=r_c, k=k, U0=None): dUx, dUy, dUz = calc_dU(coords, coords_ref, cutoff=cutoff, k=k) dx = np.multiply(s, dUx) dy = np.multiply(s, dUy) dz = np.multiply(s, dUz) # print '\tMoving coordinates max <%f, %f, %f>'%(np.max(abs(dx)),np.max(abs(dy)),np.max(abs(dz))) x = coords[:, 0] - dx y = coords[:, 1] - dy z = coords[:, 2] - dz newcoords = np.zeros(coords.shape) newcoords[:, 0] = x newcoords[:, 1] = y newcoords[:, 2] = z return newcoords # Instantiate containers for data # pdb_b, junk = pd.superpose(pdb_b, pdb_a) pdb_container_a = pdb_a.copy() pdb_container_b = pdb_b.copy() pdb_trans = pdb_a.copy() path_a = pd.Ensemble("Path from transition to state A") path_b = pd.Ensemble("Path from transition to state B") path = pd.Ensemble("Transition Path") path_a.setAtoms(pdb_a) path_b.setAtoms(pdb_b) path.setAtoms(pdb_trans) # path_a.setCoords(pdb_a) # path_b.setCoords(pdb_b) ensemble_ref = pd.Ensemble() ensemble_ref.setAtoms(pdb_a) ensemble_ref.addCoordset(pdb_a) ensemble_ref.addCoordset(pdb_b) # Interpolate coordinates print "Searching for initial transition state." coords_trans_i, E_trans_i = findCuspStruct(pdb_container_a, pdb_container_b, ensemble_ref) # Search for transition state print "Minimizing transition state." coords_trans_f = coords_trans_i E_trans_f = E_trans_i counter = np.zeros(1) while (counter < max_iter) and (E_trans_f > tol): counter += 1 coords_trans_a = minimize(coords_trans_f, pdb_a.getCoords(), s=sa) coords_trans_b = minimize(coords_trans_f, pdb_b.getCoords(), s=sb) pdb_container_a.setCoords(coords_trans_a) pdb_container_b.setCoords(coords_trans_b) coords_trans_f, E_trans_f = findCuspStruct(pdb_container_a, pdb_container_b, ensemble_ref) print "\tBeginning iteration %d, dE=%f" % (counter, E_trans_f) pdb_trans.setCoords(coords_trans_f) # Find path from transition state to reference state A, using steepest descent print "Finding paths of steepest descent from transition state." counter = np.zeros(1) rmsd = pd.calcRMSD(pdb_a.getCoords(), pdb_trans.getCoords()) pdb_container_a.setCoords(pdb_trans) while (counter < max_iter) and (rmsd > crit_rmsd): counter += 1 path_a.addCoordset(minimize(pdb_container_a.getCoords(), pdb_a.getCoords(), s=sa)) pdb_container_a.setCoords(path_a[-1]) rmsd = pd.calcRMSD(pdb_a.getCoords(), pdb_container_a.getCoords()) print "RMSD (path A): %f" % (rmsd) # Find path from transition state to reference state B, using steepest descent counter = np.zeros(1) rmsd = pd.calcRMSD(pdb_b.getCoords(), pdb_trans.getCoords()) pdb_container_b.setCoords(pdb_trans) while (counter < max_iter) and (rmsd > crit_rmsd): counter += 1 path_b.addCoordset(minimize(pdb_container_b.getCoords(), pdb_b.getCoords(), s=sb)) pdb_container_b.setCoords(path_b[-1]) rmsd = pd.calcRMSD(pdb_b.getCoords(), pdb_container_b.getCoords()) print "RMSD (path B): %f" % (rmsd) # Stitch together frames of path in proper order for i in reversed(xrange(0, len(path_a))): path.addCoordset(path_a[i].getCoords()) path.addCoordset(pdb_trans.getCoords()) for i in xrange(0, len(path_b)): path.addCoordset(path_b[i].getCoords()) print "Transition path calculation complete!" return (path, pdb_trans)
pca_FN = os.path.join('prody.pca.npz') if os.path.exists(pca_FN): pca = prody.loadModel(pca_FN) else: pca = prody.PCA() pca.buildCovariance(ensemble) # Build covariance matrix pca.calcModes() # Calculate modes prody.saveModel(pca, filename=pca_FN[:-8]) if not os.path.isdir('figures'): os.makedirs('figures') import matplotlib.pyplot as plt if not os.path.isfile('rmsd.png'): rmsd = prody.calcRMSD(ensemble) plt.clf() plt.plot(rmsd); plt.xlabel('Conformation index'); plt.ylabel('RMSD (A)'); plt.title('RMSD %f (%f)'%(rmsd.mean(), rmsd.std())) plt.savefig('figures/rmsd.png') if not os.path.isfile('blastPCA.png'): pc_ind0 = 0 pc_ind1 = 1 xtal_projection = prody.calcProjection(ensemble, pca[:20], rmsd=False) plt.clf() plt.plot(xtal_projection[:,pc_ind0],xtal_projection[:,pc_ind1],'ks') # titles = ['%s%s'%(pdb_id,chain_id) for (pdb_id,chain_id) in chain_hits]
def sidechains_rmsd_calculator(pdb_target, pdb_reference, res_file=False, area=False, write2report=False, ligand_chain="L"): """ :param pdb_target: problem pdb file :param pdb_reference: reference pdb file :param radii: area that we want to select around the ligand :param path: output path :param write2report: if true extract a report file :param ligand_chain: name of the chain of the ligand :return: superpose the backbone of the pdb_target to the pdb_reference and computes the RMSD for each side chain in the selection area """ target, reference = superimpose_backbones(pdb_target, pdb_reference) if area: print("Selection set of {} Amstrongs".format(area)) selected_area_target = reference.select( "protein and (within {} of chain {})".format(area, ligand_chain)) unique_residues_target = sorted(set(selected_area_target.getResnums())) elif res_file: aminoacids_list = read_selecteds_from_file(res_file) print( "Searching the following amino acids: {}".format(aminoacids_list)) selected_area_target = reference.select("resnum {}".format( ' '.join(aminoacids_list))) unique_residues_target = sorted(set(selected_area_target.getResnums())) else: print( "Please, set an input file or a radii to determine which amino acids will be used to compute the RMSD." ) list_of_results = [] for residue_target in unique_residues_target: res_selected_target = target.select( "protein and resnum {} and heavy".format(residue_target)) res_selected_reference = reference.select( "protein and resnum {} and heavy".format(residue_target)) target_CA = target.select( "protein and resnum {} and name CA".format(residue_target)) reference_CA = reference.select( "protein and resnum {} and name CA".format(residue_target)) try: RMSD = prody.calcRMSD(res_selected_reference, res_selected_target) distance_bet_CA = prody.calcRMSD(reference_CA, target_CA) except: print( "ERROR because different number of atoms in residue {}".format( residue_target)) print("ATOMS of the TARGET: {}".format( res_selected_target.getNames())) print("ATOMS of the REFERENCE: {}".format( res_selected_reference.getNames())) residue_information = (residue_target, res_selected_target.getResnames()[0], RMSD, distance_bet_CA) list_of_results.append(residue_information) print(residue_information) if write2report: filename = write2report with open(filename, "w") as report: for result in list_of_results: report.write("{:4d}\t{}\t{:5.3f}\t{:5.3f}\t{:5.3f}\n".format( result[0], result[1], float(result[2]), float(result[3]), (float(result[2]) - float(result[3]))))
def calc(i, j): """calculate RMSD""" return prody.calcRMSD(i, j)
def get_rmsds_to_reference(ensemble): """ Gets RMSD of each structure to the reference """ return pd.calcRMSD(ensemble)
def alignment_monstrosity(self, rmsd_cutoff=0.5, use_local_pdb_database=False, verify_substructure=True): """ Consequences of not thinking ahead... For each fragment, align all fragment-containing ligands to fragment Generate PDBs with aligned coordinate systems :param args: :param rmsd_cutoff: fragment alignment RMSD cutoff, anything higher gets rejected :return: """ # Create directory for processed PDBs rejected_dict = self.load_previously_rejected_pdbs() # Create directories... if not use_local_pdb_database: os.makedirs(self.pdb_bank_dir, exist_ok=True) os.makedirs(self.processed_PDBs_path, exist_ok=True) # If use_local_pdb_database=False, use PDB FTP to download all structures # Otherwise, all relevant structures should be found in the local PDB database if not use_local_pdb_database: prody.pathPDBFolder(folder=self.pdb_bank_dir) for current_fragment in self.pdb_ligand_json: # Only download PDBs that aren't already in PDB bank directory existing_PDBs = [ pdb[:4].lower() for pdb in os.listdir(self.pdb_bank_dir) ] PDBs_to_download = list( set(self.pdb_ligand_json[current_fragment]['PDBs']) - set(existing_PDBs)) if len(PDBs_to_download) > 0: print(f'Downloading PDBs for {current_fragment}...\n') prody.fetchPDBviaFTP(*PDBs_to_download) else: print( f'All relevant PDBs for {current_fragment} found in {self.pdb_bank_dir}!\n' ) # Fragment_1, Fragment_2, ... for current_fragment in self.pdb_ligand_json: # Create directory for processed PDBs processed_dir = os.path.join(self.processed_PDBs_path, current_fragment) processed_dir_exists = os.path.exists(processed_dir) os.makedirs(processed_dir, exist_ok=True) # Get list of already processed PDBs for current_fragment already_processed_pdbs = [ file[:4].lower() for file in os.listdir(processed_dir) ] # Save ideal_ligand_containers for each fragment so things are only downloaded once ideal_ligand_dict = dict() ideal_ligand_dict['Ligands'] = dict() ideal_ligand_dict['Failed'] = list() # Align_PDB class holds all information for the current fragment align = Align_PDB(self.user_defined_dir, current_fragment, self.sanitized_smiles_dict[current_fragment], verify_substructure=verify_substructure) # Get PDB IDs that are viable for extracting protein-fragment contacts reject_pdbs = rejected_dict[ current_fragment] if current_fragment in rejected_dict.keys( ) else list() if not processed_dir_exists: reject_pdbs = list() reject_pdbs.append('3k87') # DEBUGGING viable_pdbs = list( set(self.pdb_ligand_json[current_fragment]['PDBs']) - set(reject_pdbs) - set(already_processed_pdbs)) # For each PDB containing a fragment-containing compound for pdbid in viable_pdbs: # Return path of PDB file to use for processing found_pdb, pdb_path = self.return_PDB_to_use_for_alignments( pdbid, use_local_pdb_database=use_local_pdb_database) if not found_pdb: print(f'Cannot find {pdbid}!') continue # Proceed with processing if the current PDB passes all filters print("\n\nProcessing {}...".format(pdbid)) # --- Check which ligands contain relevant fragments --- # relevant_ligands = self.return_substructure_containing_ligands( pdb_path, self.pdb_ligand_json, current_fragment) # Set things up! Get ligands from Ligand Expo if haven't already tried and failed for ligand in relevant_ligands: if not ideal_ligand_dict['Ligands'].get( ligand ) and ligand not in ideal_ligand_dict['Failed']: ideal_ligand_container = Ideal_Ligand_PDB_Container( ligand) if ideal_ligand_container.success: ideal_ligand_dict['Ligands'][ ligand] = ideal_ligand_container else: ideal_ligand_dict['Failed'].append(ligand) # Create a temp list for ligands that will be pulled from the current PDB ligand_container_dict_for_current_pdb = { lig: ideal_ligand_dict['Ligands'][lig] for lig in ideal_ligand_dict['Ligands'] if lig in relevant_ligands } relevant_ligands_prody_dict = align.extract_ligand_records( pdb_path, ligand_container_dict_for_current_pdb) # Reject if no ligands with all atoms represented can be found for the given PDB if len(relevant_ligands_prody_dict) < 1: if current_fragment in rejected_dict.keys(): rejected_dict[current_fragment].append(pdbid) else: rejected_dict[current_fragment] = [pdbid] print( 'REJECTED - no target ligands were fully represented in the PDB' ) continue # --- Perform alignment of PDB fragment substructure (mobile) onto defined fragment (target) --- # # ...if PDB has not been processed, rejected, or excluded by the user else: # Iterate over ligands found to contain fragments as substructures for ligand_resname, ligand_chain, ligand_resnum in relevant_ligands_prody_dict: # Mapping of fragment atoms to target ligand atoms target_ligand_ideal_smiles = ligand_container_dict_for_current_pdb[ ligand_resname].smiles # todo: catch ligands with missing SMILES strings earlier... if target_ligand_ideal_smiles is None: continue target_ligand_pdb_string = io.StringIO() target_ligand_prody = relevant_ligands_prody_dict[( ligand_resname, ligand_chain, ligand_resnum)].select('not hydrogen') prody.writePDBStream(target_ligand_pdb_string, target_ligand_prody) mapping_successful, fragment_target_map = align.fragment_target_mapping( target_ligand_ideal_smiles, target_ligand_pdb_string) if not mapping_successful: if current_fragment in rejected_dict.keys(): rejected_dict[current_fragment].append(pdbid) else: rejected_dict[current_fragment] = [pdbid] print( 'REJECTED - failed atom mapping between target and reference fragment' ) continue print( f'\n{len(fragment_target_map)} possible mapping(s) of fragment onto {pdbid}:{ligand} found...\n' ) # Iterate over possible mappings of fragment onto current ligand rmsd_success = False for count, mapping in enumerate(fragment_target_map): # todo: refactor to use RDKit's atom.GetMonomerInfo() for atom selections... # Determine translation vector and rotation matrix target_coords_and_serials, frag_atom_coords, transformation_matrix = align.determine_rotation_and_translation( mapping, target_ligand_prody) trgt_atom_coords, target_fragment_atom_serials = target_coords_and_serials # Apply transformation to protein_ligand complex if rmsd if below cutoff # Use information from PubChem fragment SMILES in determining correct mappings # Actually, map fragment onto source ligand and use valence information to determine correct mappings rmsd = prody.calcRMSD( frag_atom_coords, prody.applyTransformation( transformation_matrix, trgt_atom_coords)) print( 'RMSD of target onto reference fragment:\t{}'. format(rmsd)) if rmsd < rmsd_cutoff: transformed_pdb = align.apply_transformation( pdb_path, ligand_resnum, target_fragment_atom_serials, transformation_matrix) # Continue if transformed_pdb - ligand is None if transformed_pdb.select( f'not (resname {ligand_resname})' ) is None: continue transformed_pdb_name = f'{pdbid}_{ligand_resname}_{ligand_chain}_{ligand_resnum}-{count}.pdb' prody.writePDB( os.path.join(processed_dir, transformed_pdb_name), transformed_pdb) rmsd_success = True else: print( 'REJECTED - high RMSD upon alignment to reference fragment' ) if rmsd_success is False: if current_fragment in rejected_dict.keys(): rejected_dict[current_fragment].append(pdbid) else: rejected_dict[current_fragment] = [pdbid] # Remember rejected PDBs with open(self.rejected_dict_pickle, 'wb') as reject_pickle: pickle.dump(rejected_dict, reject_pickle)
import sys import argparse import prody from TMalign import TMalign if __name__ == '__main__': p = argparse.ArgumentParser(description="L-RMS calculater") p.add_argument('reference_PDBfile') p.add_argument('model_PDBfile') p.add_argument('-r','--ref_receptor',default='A',help='chain name of reference receptor') p.add_argument('-l','--ref_ligand',default='B',help='chain name of reference ligand') p.add_argument('-R','--model_receptor',default='A',help='chain name of model receptor') p.add_argument('-L','--model_ligand',default='B',help='chain name of model ligand') p.add_argument('--tmalign',default='/usr/local/bin/TMalign',help='path to TMalign') args = p.parse_args() ref_receptor = prody.parsePDB(args.reference_PDBfile,chain=args.ref_receptor) ref_ligand = prody.parsePDB(args.reference_PDBfile,chain=args.ref_ligand) model_receptor = prody.parsePDB(args.model_PDBfile,chain=args.model_receptor) model_ligand = prody.parsePDB(args.model_PDBfile,chain=args.model_ligand) tmalign = TMalign(model_receptor,ref_receptor,path = args.tmalign) trans = prody.Transformation(tmalign.matrix,tmalign.vector) trans.apply(model_ligand) lrms = prody.calcRMSD(model_ligand,ref_ligand) print lrms
def _do_align(self): self._transformation = prody.calcTransformation(self._prediction, self._native) self._transformation.apply(self._prediction) rmsd = prody.calcRMSD(self._native, self._prediction) self._align_results = RMSDAlignmentResult(rmsd)
def prune_pdb_models(pdb_models): ''' This function takes a list of structural models corresponding to a single pdb ID (just isolated models). It prunes them to find representative models and eliminates redundant ones Arguments: pdb_models -- full list of pdb models (iso) Returns: pruned_models -- list of pruned representative pdb models ''' pruned_models = [] # determine which files actually exist, delete parent dirs of those that don't iso_pdb_models = [] for model in pdb_models: if not os.path.exists(model): print os.path.basename( model), 'does not exist! Deleting parent directory.' delete_model(model) else: iso_pdb_models.append(model) # find representative models rep_overlap_cutoff = 50 # percent seq overlap required (90% seq ID required) rep_rmsd_cutoff = 5 # models less than 4A apart are represented by a single model # find representative iso models print 'Finding representative PDB ISO models...' rep_iso_models = [] for iso_model in iso_pdb_models: if len(rep_iso_models) == 0: rep_iso_models.append(iso_model) else: model = prody.parsePDB(iso_model) # get structure redundant = False for rep_iso_model in rep_iso_models: rep = prody.parsePDB(rep_iso_model) # get structure # calc RMSD between model and rep alignment = prody.matchAlign(model, rep, overlap=rep_overlap_cutoff) if alignment != None: rmsd = prody.calcRMSD(alignment[1], alignment[2]) if rmsd <= rep_rmsd_cutoff: redundant = True # we already have a representative for this segment # take the larger structure as the representative if model.numResidues() > rep.numResidues(): rep_iso_models.remove(rep_iso_model) rep_iso_models.append(iso_model) break # if the iso model does not match any of our representative models, # then add it to the representative models list if not redundant: rep_iso_models.append(iso_model) print 'Found', len(rep_iso_models), 'representative ISO models:', map( os.path.basename, rep_iso_models) # move representative models to their own directory if len(rep_iso_models) > 0: pdb_dir = os.path.abspath( os.path.join(rep_iso_models[0], os.pardir + '/' + os.pardir)) rep_model_dir = pdb_dir + '/representative_pdb_models/' if os.path.exists(rep_model_dir): shutil.rmtree(rep_model_dir) os.mkdir(rep_model_dir) for rep_iso_model in rep_iso_models: rep_iso_model_pardir = os.path.abspath( os.path.join(rep_iso_model, os.pardir)) new_path = rep_model_dir + '/' + os.path.basename( rep_iso_model_pardir) shutil.copytree(rep_iso_model_pardir, new_path) # define new pathname to keep track of the models once we move them new_iso_model_path = rep_model_dir + os.path.basename( rep_iso_model_pardir) + '/' + os.path.basename(rep_iso_model) pruned_models.append(new_iso_model_path) # return all representative pdb models return pruned_models
def generate_fuzzball_contact_rotamersets(ligand_conformer_path, match_path, match_pose, sfxn, match_residue_map, flag_special_rot=True, custom_taskop=None, rotset_limit=200, contact_method='RMSD', RMSD_limit=1.5, apply_minimization=False, dump_rotamerset_pdb=False, report_stats=False, defined_positions=None): """ Generate rotamers that recapitulate observed fuzzball contacts for each position in a nucleated match :param ligand_conformer_path: path to ligand generated by molfile_to_params.py :param flag_special_rot: If true, flag rotamers as SPECIAL_ROT variants :param custom_taskop: list of task operations to apply to the PackerTask used to generate rotamers :return: viable_rotamers dictionary of rotamers organized by position and residue identity """ sfxn_weights = sfxn.weights() conformer_resnum = match_pose.size( ) # Assumes single ligand appended to end of sequence if contact_method not in ['RMSD', 'matcher']: raise Exception( 'Contact method needs to be one of the following: "RMSD", "matcher"' ) # --- Find and store viable rotamers --- # viable_rotamers = dict() rotamer_stats = dict() # Setting things up is going to mess up the match pose, so use a clone match_pose_clone = match_pose.clone() sfxn(match_pose_clone) # --- Transform match pose clone onto fuzzball conformer --- # """Required for contact coordsets to make sense""" # Get ligand from match, always last residue # todo: select chain X, ligand is always chain X match_pose_size = match_pose_clone.size() match_ligand = match_pose_clone.residue(match_pose_size) # Get match positions if they exist motif_resnums = list() with open(match_path, 'r') as my_match: for line in my_match: if line.startswith('REMARK 666 MATCH TEMPLATE'): motif_resnums.append(int(line.split()[11])) motif_and_ligand_resnums = motif_resnums + [conformer_resnum] # Keep track of match positions and compatible residue identites # match_residue_map = {position: dict() for position in range(1, match_pose.size())} # Assumes one ligand appended to end of sequence # Import conformer from pose fuzzball_ligand_pose = rosetta.core.pose.Pose() rosetta.core.import_pose.pose_from_file(fuzzball_ligand_pose, ligand_conformer_path) fuzzball_ligand = fuzzball_ligand_pose.residue(1) # Calculate rotation/translation by hand using first three atoms of ligand mobile_match = rosetta.numeric.xyzTransform_double_t( match_ligand.xyz(1), match_ligand.xyz(2), match_ligand.xyz(3)) mobile_match_inverse = mobile_match.inverse() target_fuzzball = rosetta.numeric.xyzTransform_double_t( fuzzball_ligand.xyz(1), fuzzball_ligand.xyz(2), fuzzball_ligand.xyz(3)) ligand_rotation = target_fuzzball.R * mobile_match_inverse.R ligand_translation = target_fuzzball.R * mobile_match_inverse.t + target_fuzzball.t # Apply transformation match_pose_clone.apply_transform_Rx_plus_v(ligand_rotation, ligand_translation) match_pose_clone_ligand = match_pose_clone.residue(match_pose_size).clone() # --- All other operations --- # # Mutate all non-motif residues within 10A from ligand to ALA, interferes with RotamerSet generation ligand_residue_selector = rosetta.core.select.residue_selector.ChainSelector( 'X') neighborhood_selector = rosetta.core.select.residue_selector.NeighborhoodResidueSelector( ligand_residue_selector, 10, False) neighborhood_selector_bool = neighborhood_selector.apply(match_pose_clone) neighborhood_residues_resnums = rosetta.core.select.get_residues_from_subset( neighborhood_selector_bool) positions_to_consider = list( set(neighborhood_residues_resnums) - set(motif_and_ligand_resnums)) mutate = rosetta.protocols.simple_moves.MutateResidue() mutate.set_res_name('ALA') for position in positions_to_consider: if match_pose_clone.residue(position).name3() not in [ 'GLY', 'PRO' ] and 'disulfide' not in match_pose_clone.residue(position).name(): mutate.set_target(position) mutate.apply(match_pose_clone) # Build RotamerSets for each extrachi/sample level if dump_rotamerset_pdb: all_rotamersets = rosetta.core.pack.rotamer_set.RotamerSetsFactory.create_rotamer_sets( match_pose_clone) task_factory = rosetta.core.pack.task.TaskFactory() # NATRO positions TaskOp rotamer_candidates_rs = rosetta.core.select.residue_selector.ResidueIndexSelector( ','.join([str(i) for i in match_residue_map.keys()])) natro_rs = rosetta.core.select.residue_selector.NotResidueSelector( rotamer_candidates_rs) natro_op = rosetta.core.pack.task.operation.OperateOnResidueSubset( rosetta.core.pack.task.operation.PreventRepackingRLT(), natro_rs) task_factory.push_back(natro_op) rotamersets_packer_task = task_factory.create_task_and_apply_taskoperations( match_pose_clone) all_rotamersets.set_task(rotamersets_packer_task) # Remove ligand from match_pose_clone before generating rotamers!!! match_pose_clone_apo = match_pose_clone.clone() match_pose_clone_apo.conformation_ptr().delete_residue_slow( match_pose_size) # Define positions where rotamers will be considered if defined_positions: rotamerset_positions = list( set(defined_positions) & set(match_residue_map.keys())) else: rotamerset_positions = list(match_residue_map.keys()) print(f'Rotamerset Positions: {rotamerset_positions}') # Generate rotamers at each position for position in rotamerset_positions: # Prepare minimization if apply_minimization: motif_movemap = rosetta.core.kinematics.MoveMap() motif_movemap.set_chi(position, True) minimize_motif = rosetta.protocols.minimization_packing.MinMover() minimize_motif.movemap(motif_movemap) minimize_motif.score_function(sfxn) minimize_motif.min_type('lbfgs_armijo') minimize_motif.tolerance(1e-6) # Prepare infrastructure rotamer_stats[position] = dict() if dump_rotamerset_pdb: current_rotamerset = rosetta.core.pack.rotamer_set.RotamerSetFactory.create_rotamer_set( match_pose_clone) # Keep rotamers that are compatible with minimal binding motif for contact_residue in match_residue_map[position]: # print(f'Considering position {position}: {contact_residue}') position_rotamer_list = list() possible_contact_geometries = match_residue_map[position][ contact_residue] # --- Prepare viable rotamers for each position --- # # Define packertask using neighborhood_selector packer_task = rosetta.core.pack.task.TaskFactory.create_packer_task( match_pose_clone_apo) packer_task.initialize_from_command_line() # Get boolean vector for packable positions and apply to packer task packable_positions = rosetta.utility.vector1_bool() packable_position_list = [ True if i == position else False for i in range(1, match_pose_clone_apo.size()) ] for bool_value in packable_position_list: packable_positions.append(bool_value) packer_task.restrict_to_residues(packable_positions) # Only build rotamers for residues with Hbond donors/acceptors restrict_CAAs = rosetta.core.pack.task.operation.RestrictAbsentCanonicalAAS( position, rosetta.utility.vector1_bool(20)) restrict_CAAs.keep_aas(contact_residue) restrict_CAAs.apply(match_pose_clone_apo, packer_task) packer_neighbor_graph = rosetta.core.pack.create_packer_graph( match_pose_clone_apo, sfxn, packer_task) match_rotamer_set = rosetta.core.pack.rotamer_set.RotamerSetFactory.create_rotamer_set( match_pose_clone_apo) match_rotamer_set.set_resid(position) match_rotamer_set.build_rotamers(match_pose_clone_apo, sfxn, packer_task, packer_neighbor_graph, use_neighbor_context=False) if match_rotamer_set.num_rotamers( ) <= 1 and match_rotamer_set.rotamer(1).name1() != contact_residue: continue print( f'Position {position} ResidueType {contact_residue} - comparing {match_rotamer_set.num_rotamers()} rotamers against {len(possible_contact_geometries)} contact modes' ) rotamer_stats[position][contact_residue] = dict() rotamer_stats[position][contact_residue][ 'num_rotamers'] = match_rotamer_set.num_rotamers() rotamer_info = list() rotamers_accepted = 0 # --- Evaluate Rotamers --- # for rotamer in range(1, match_rotamer_set.num_rotamers() + 1): # Place residue before applying to pose!!!! # Rotamers need to be transformed back onto the backbone of the input pdb!!! trail_rotamer = match_rotamer_set.rotamer(rotamer) trail_rotamer.place(match_pose_clone.residue(position), match_pose_clone.conformation_ptr()) match_pose_clone.replace_residue(position, trail_rotamer, False) pose_trial_rotamer = match_pose_clone.residue(position) # Evaluate RMSD to possible_contact_geometries contact_RMSDs = list() dof_errors = list() sad_atom_in_rotamer = False for contact_mode in possible_contact_geometries: # REFERENCE: contact_info = [current_motif_coord_list, [float(a) for a in dof_tuple], constraint_atoms_dict['residue']['atom_names'], constraint_atoms_dict['ligand']['atom_names']] current_motif_coord_list = contact_mode[0] contact_dofs = contact_mode[1] residue_matchatoms = contact_mode[2] ligand_matchatoms = contact_mode[3] # Skip rotamer if contact is mediated by a backbone atom... if residue_matchatoms[0] in ['C', 'CA', 'N', 'O']: continue # Get contact atom coords using atom names try: rotamer_contact_coords = [ list(match_pose_clone.residue(position).xyz(atom)) for atom in residue_matchatoms ] # If distance is off, don't even bother... residue_contactatom = pose_trial_rotamer.xyz( residue_matchatoms[0]) ligand_contactatom = match_pose_clone_ligand.xyz( ligand_matchatoms[0]) atom_displacement = ligand_contactatom - residue_contactatom if atom_displacement.norm() > 4: # print(f'Contact is {atom_displacement.norm()}A, continuing...') continue residue_atomid_list = [ pose_trial_rotamer.xyz(atom) for atom in residue_matchatoms ] ligand_atomid_list = [ match_pose_clone_ligand.xyz(atom) for atom in ligand_matchatoms ] # Res1 - ligand, Res2 - residue # 'angle_A' is the angle Res1:Atom2 - Res1:Atom1 - Res2:Atom1 angle_A = rosetta.numeric.angle_degrees_double( ligand_atomid_list[1], ligand_atomid_list[0], residue_atomid_list[0]) # 'angle_B' is the angle Res1:Atom1 - Res2:Atom1 - Res2:Atom2 angle_B = rosetta.numeric.angle_degrees_double( ligand_atomid_list[0], residue_atomid_list[0], residue_atomid_list[1]) # 'torsion_A' is the dihedral Res1:Atom3 - Res1:Atom2 - Res1:Atom1 - Res2:Atom1 torsion_A = rosetta.numeric.dihedral_degrees_double( ligand_atomid_list[2], ligand_atomid_list[1], ligand_atomid_list[0], residue_atomid_list[0]) # 'torsion_AB' is the dihedral Res1:Atom2 - Res1:Atom1 - Res2:Atom1 - Res2:Atom2 torsion_AB = rosetta.numeric.dihedral_degrees_double( ligand_atomid_list[1], ligand_atomid_list[0], residue_atomid_list[0], residue_atomid_list[1]) # 'torsion_B' is the dihedral Res1:Atom1 - Res2:Atom1 - Res2:Atom2 - Res2:Atom3 torsion_B = rosetta.numeric.dihedral_degrees_double( ligand_atomid_list[0], residue_atomid_list[0], residue_atomid_list[1], residue_atomid_list[2]) rotamer_dofs = [ angle_A, angle_B, torsion_A, torsion_AB, torsion_B ] except Exception as e: print(e, residue_matchatoms, ligand_matchatoms) # print(f'Skipping {contact_mode[0]}: contains sad atom.') sad_atom_in_rotamer = True break # todo: Edge condition at 0/360... dof_difference_list = [ abs(ideal - measured) for ideal, measured in zip( contact_dofs[1:], rotamer_dofs) ] # print('contact_dofs:', contact_dofs) # print('rotamer_dofs:', rotamer_dofs) # print('DOF DIFFERENCE LIST:', dof_difference_list) dof_errors.append(max(dof_difference_list)) contact_RMSDs.append( prody.calcRMSD(np.asarray(current_motif_coord_list), np.asarray(rotamer_contact_coords))) if len(dof_errors) == 0: continue if sad_atom_in_rotamer: continue # Continue if current rotamer does not have <{RMSD_limit}A RMSD with any contact mode if contact_method == 'RMSD' and min(contact_RMSDs, default=666) > RMSD_limit: rotamer_info.append((contact_RMSDs, None, None)) continue # Only continue if a contact mode exists where max angle/torsion DOF error < 10 degrees if contact_method == 'matcher' and min(dof_errors) > 15: continue # Apply minimization to rotamer-ligand interaction before deciding to accept if apply_minimization: minimize_motif.apply(match_pose_clone) # Evaluate possible clashes (fa_rep) with motif residues and ligand sfxn(match_pose_clone) edges = match_pose_clone.energies().energy_graph() motif_fa_rep = list() for motif in motif_and_ligand_resnums: current_edge = edges.find_energy_edge(position, motif) if current_edge is not None: current_edge.fill_energy_map() motif_fa_rep.append( current_edge[rosetta.core.scoring.fa_rep]) # Get score for current rotamer against ligand current_edge = edges.find_energy_edge(position, conformer_resnum) rotamer_ligand_reu = current_edge.dot( sfxn_weights) if current_edge is not None else 0 if all([ min(motif_fa_rep, default=666) < 20, rotamer_ligand_reu <= 20 ]): if flag_special_rot: current_rsd_type_ptr = match_pose_clone.residue_type_ptr( position) new_rsd_type_mutable = rosetta.core.chemical.MutableResidueType( current_rsd_type_ptr) new_rsd_type_mutable.add_variant_type( rosetta.core.chemical.SPECIAL_ROT) new_rsd_type = rosetta.core.chemical.ResidueType.make( new_rsd_type_mutable) rosetta.core.pose.replace_pose_residue_copying_existing_coordinates( match_pose_clone, position, new_rsd_type) # Place residue before applying to pose!!!! # Rotamers need to be transformed back onto the backbone of the input pdb!!! new_rotamer = match_pose_clone.residue(position).clone() new_rotamer.place(match_pose.residue(position), match_pose.conformation_ptr()) position_rotamer_list.append( (rotamer_ligand_reu, new_rotamer)) rotamers_accepted += 1 if dump_rotamerset_pdb: current_rotamerset.add_rotamer(new_rotamer) rotamer_info.append( (max(dof_errors), max(motif_fa_rep, default=0), rotamer_ligand_reu)) print( f'{rotamers_accepted} of {match_rotamer_set.num_rotamers()} rotamers accepted' ) rotamer_stats[position][contact_residue][ 'rotamer_info'] = rotamer_info rotamer_stats[position][contact_residue][ 'rotamers_accepted'] = rotamers_accepted if len(position_rotamer_list) > 0: position_rotamer_list_selected = sorted( position_rotamer_list, key=lambda x: x[0])[:rotset_limit] position_rotamer_list = [ rot[1] for rot in position_rotamer_list_selected ] if position not in viable_rotamers.keys(): viable_rotamers[position] = dict() viable_rotamers[position][ contact_residue] = position_rotamer_list if dump_rotamerset_pdb: current_moltresid = all_rotamersets.resid_2_moltenres(position) all_rotamersets.set_explicit_rotamers(current_moltresid, current_rotamerset) if dump_rotamerset_pdb: current_extrachi = len([ rosetta.basic.options.get_boolean_option(f'packing:ex{i}') for i in range(1, 5) if rosetta.basic.options.get_boolean_option(f'packing:ex{i}') is True ]) current_sample_level = rosetta.basic.options.get_integer_option( f'packing:ex{current_extrachi}:level') if current_extrachi <= 2 and current_sample_level <= 3: match_name = os.path.normpath(os.path.basename(match_path)) # todo: figure out why this doesn't work... problem with CONECT records... # all_rotamersets.dump_pdb(match_pose_clone, f"{match_name.split('.')[0]}-extrachi_{current_extrachi}-sampling_{current_sample_level}.pdb") all_rotamers_pose = pyrosetta.pose_from_sequence('A') for position in match_residue_map.keys(): position_rotset = all_rotamersets.rotamer_set_for_residue( position) for rot in range(1, position_rotset.num_rotamers() + 1): all_rotamers_pose.append_residue_by_jump( position_rotset.rotamer(rot), 1) all_rotamers_pose.dump_pdb( f"{match_name.split('.')[0]}-extrachi_{current_extrachi}-sampling_{current_sample_level}.pdb" ) if report_stats: return viable_rotamers, rotamer_stats else: return viable_rotamers
def rmsd(a, b): """Return the RMSD between two sets of coordinates.""" t = pr.calcTransformation(a, b) return pr.calcRMSD(t.apply(a), b)
def find_possible_ifgs_rmsd(self, comb, rmsd_threshold=1.0): """uses iFG definitions in comb object to select iFGs in the parsed protein object that have all atoms and occupancies = 1. """ possible_ifgs = [] if comb.num_res_ifg_query == 1: poss_ifg_sel = self.prody_pdb.select('segment A and chain ' + self.pdb_chain + ' sequence "' + comb.ifg_seq_str_query + '"') if poss_ifg_sel is not None: ifg_resindices, indices = np.unique( poss_ifg_sel.getResindices(), return_index=True) ifg_resnames = poss_ifg_sel.getResnames()[indices] for ifg_resindex, ifg_resname in zip(ifg_resindices, ifg_resnames): ifg_selection = self.prody_pdb.select( 'resindex ' + str(ifg_resindex) + ' and name ' + comb.ifg_sele_dict_query[1][ifg_resname]) if ifg_selection is not None: num_atoms = len(ifg_selection) if num_atoms == len(comb.ifg_sele_dict_query[1] [ifg_resname].split()): if all(ifg_selection.getResnums() > 0): possible_ifgs.append(ifg_selection) comb.total_possible_ifgs += len(possible_ifgs) else: poss_ifg_sel = self.prody_pdb.select('segment A and chain ' + self.pdb_chain + ' sequence "' + comb.ifg_seq_str_query + '"') if poss_ifg_sel is not None: ifg_resindices_cat_list, indices = np.unique( poss_ifg_sel.getResindices(), return_index=True) ifg_resnames_cat_list = poss_ifg_sel.getResnames()[indices] ifg_resindex_pairs = [ ifg_resindices_cat_list[i:i + 2] for i in range(0, len(ifg_resindices_cat_list), 2) ] ifg_resname_pairs = [ ifg_resnames_cat_list[i:i + 2] for i in range(0, len(ifg_resnames_cat_list), 2) ] for ifg_resindex_pair, ifg_resname_pair in zip( ifg_resindex_pairs, ifg_resname_pairs): resind1, resind2 = ifg_resindex_pair resname1, resname2 = ifg_resname_pair try: ifg_selection = self.prody_pdb.select( '(resindex ' + str(resind1) + ' and name ' + comb.ifg_sele_dict_query[1][resname1] + ')' + ' or (resindex ' + str(resind2) + ' and name ' + comb.ifg_sele_dict_query[2][resname2] + ')') except KeyError: print('Non-canonical residue in iFG, skipping.') ifg_selection = None if ifg_selection is not None: num_atoms = len(ifg_selection) names = comb.ifg_sele_dict_query[1][resname1].split() names.extend( comb.ifg_sele_dict_query[2][resname2].split()) if num_atoms == len(names): if all(ifg_selection.getResnums() > 0): possible_ifgs.append(ifg_selection) comb.total_possible_ifgs += len(possible_ifgs) passed_possible_ifgs = [] for pifg in possible_ifgs: com = pr.calcCenter( pifg.select('name ' + ' '.join(comb.query_names[0]))) q2_sel = self.prody_pdb.select( 'name ' + ' '.join(comb.query_names[1]) + ' within ' + str(comb.query_distance) + ' of center', center=com) if q2_sel is not None: resinds_query2s = np.unique(q2_sel.getResindices()) q_sel1_coords = [ pifg.select('name ' + n).getCoords()[0] for n in comb.query_names[0] ] for resind in resinds_query2s: q_sel = self.prody_pdb.select( 'name ' + ' '.join(comb.query_names[1]) + ' and resindex ' + str(resind)) if len(q_sel) == len(comb.query_names[1]): q_sel2_coords = [ q_sel.select('name ' + n).getCoords()[0] for n in comb.query_names[1] ] pifg_coords = np.vstack((q_sel1_coords, q_sel2_coords)) for coords in comb.query_coords: R, m_com, t_com = get_rot_trans( coords, pifg_coords) coords_transformed = np.dot( (coords - m_com), R) + t_com rmsd = pr.calcRMSD(coords_transformed, pifg_coords) if rmsd <= rmsd_threshold: passed_possible_ifgs.append( q_sel ) # This only takes the query2 selection as the iFG. break return passed_possible_ifgs