def evol_conserv(msa, **kwargs): import prody from prody import parseMSA, calcShannonEntropy, showShannonEntropy from prody import writeArray from os.path import splitext prefix = kwargs.get('prefix') if prefix is None: prefix, _ = splitext(msa) if _.lower() == '.gz': prefix, _ = splitext(prefix) prefix += '_conserv' msa = parseMSA(msa) entropy = calcShannonEntropy(msa, **kwargs) writeArray(prefix + '.txt', entropy, format=kwargs.get('numformat', '%12g')) if kwargs.get('figent'): try: import matplotlib.pyplot as plt except ImportError: LOGGER.warn('Matplotlib could not be imported, ' 'figures are not saved.') else: prody.SETTINGS['auto_show'] = False width = kwargs.get('figwidth', 8) height = kwargs.get('figheight', 6) figargs = kwargs.get('figargs', ()) figure = plt.figure(figsize=(width, height)) show = showShannonEntropy(entropy, msa=msa, *figargs) format = kwargs.get('figformat', 'pdf') figure.savefig(prefix + '.' + format, format=format, dpi=kwargs.get('figdpi', 300))
def evol_occupancy(msa, **kwargs): from numpy import arange import prody from prody import parseMSA, calcMSAOccupancy, showMSAOccupancy, writeArray from os.path import splitext prefix = kwargs.get('prefix') if prefix is None: prefix, _ = splitext(msa) if _.lower() == '.gz': prefix, _ = splitext(prefix) prefix += '_occupancy' msa = parseMSA(msa) numformat = kwargs.get('numformat', '%12g') occupancy, suffix = [], [] occaxis = kwargs.get('occaxis', 'row') if occaxis == 'both': suffix = ['_row', '_col'] occupancy.append(calcMSAOccupancy(msa, occ='row')) occupancy.append(calcMSAOccupancy(msa, occ='col')) else: suffix = '_' + occaxis occupancy.append(calcMSAOccupancy(msa, occ=occaxis)) for i, occ in enumerate(occupancy): writeArray((prefix + suffix[i] + '.txt'), occ, format=numformat) for i, occ in enumerate(occupancy): if kwargs.get('figocc'): try: import matplotlib.pyplot as plt except ImportError: LOGGER.warn('Matplotlib could not be imported, ' 'figures are not saved.') else: prody.SETTINGS['auto_show'] = False width = kwargs.get('figwidth', 8) height = kwargs.get('figheight', 6) xlabel = kwargs.get('xlabel') title = kwargs.get('title') figure = plt.figure(figsize=(width, height)) label = kwargs.get('label') show = showMSAOccupancy(msa=msa, occ=occ, label=label, xlabel=xlabel, title=title) format = kwargs.get('figformat', 'pdf') figure.savefig(prefix + suffix[i] + '.' + format, format=format, dpi=kwargs.get('figdpi', 300))
def evol_occupancy(msa, **kwargs): from numpy import arange import prody from prody import parseMSA, calcMSAOccupancy, showMSAOccupancy, writeArray from os.path import splitext prefix = kwargs.get('prefix') if prefix is None: prefix, _ = splitext(msa) if _.lower() == '.gz': prefix, _ = splitext(prefix) prefix += '_occupancy' msa = parseMSA(msa) numformat = kwargs.get('numformat', '%12g') occupancy , suffix = [], [] occaxis = kwargs.get('occaxis', 'row') if occaxis == 'both': suffix = ['_row', '_col'] occupancy.append(calcMSAOccupancy(msa, occ='row')) occupancy.append(calcMSAOccupancy(msa, occ='col')) else: suffix = '_' + occaxis occupancy.append(calcMSAOccupancy(msa, occ=occaxis)) for i, occ in enumerate(occupancy): writeArray((prefix + suffix[i] + '.txt'), occ, format=numformat) for i, occ in enumerate(occupancy): if kwargs.get('figocc'): try: import matplotlib.pyplot as plt except ImportError: LOGGER.warn('Matplotlib could not be imported, ' 'figures are not saved.') else: prody.SETTINGS['auto_show'] = False width = kwargs.get('figwidth', 8) height = kwargs.get('figheight', 6) xlabel = kwargs.get('xlabel') title = kwargs.get('title') figure = plt.figure(figsize=(width, height)) label = kwargs.get('label') show = showMSAOccupancy(msa=msa, occ=occ, label=label, xlabel=xlabel, title=title) format = kwargs.get('figformat', 'pdf') figure.savefig(prefix + suffix[i] + '.' + format, format=format, dpi=kwargs.get('figdpi', 300))
def prody_pca(opt): """Perform PCA calculations based on command line arguments.""" outdir = opt.outdir if not os.path.isdir(outdir): opt.subparser.error('{0:s} is not a valid path'.format(outdir)) import prody LOGGER = prody.LOGGER coords = opt.coords prefix = opt.prefix nmodes, selstr = opt.nmodes, opt.select if os.path.splitext(coords)[1].lower() == '.dcd': ag = opt.psf or opt.pdb if ag: if os.path.splitext(ag)[1].lower() == '.psf': ag = prody.parsePSF(ag) else: ag = prody.parsePDB(ag) dcd = prody.DCDFile(opt.coords) if len(dcd) < 2: opt.subparser("DCD file must contain multiple frames.") if ag: dcd.setAtomGroup(ag) select = dcd.select(selstr) LOGGER.info('{0:d} atoms are selected for calculations.' .format(len(select))) else: select = prody.AtomGroup() select.setCoords(dcd.getCoords()) pca = prody.PCA(dcd.getTitle()) if len(dcd) > 1000: pca.buildCovariance(dcd) pca.calcModes(dcd) else: pca.performSVD(dcd[:]) else: pdb = prody.parsePDB(opt.coords) if pdb.numCoordsets() < 2: opt.subparser("PDB file must contain multiple models.") if prefix == '_pca': prefix = pdb.getTitle() + '_pca' select = pdb.select(selstr) LOGGER.info('{0:d} atoms are selected for calculations.' .format(len(select))) if select is None: opt.subparser('Selection "{0:s}" do not match any atoms.' .format(selstr)) LOGGER.info('{0:d} atoms will be used for PCA calculations.' .format(len(select))) ensemble = prody.Ensemble(select) pca = prody.PCA(pdb.getTitle()) ensemble.iterpose() pca.performSVD(ensemble) LOGGER.info('Writing numerical output.') if opt.npz: prody.saveModel(pca) prody.writeNMD(os.path.join(outdir, prefix + '.nmd'), pca[:nmodes], select) outall = opt.all delim, ext, format = opt.delim, opt.ext, opt.numformat if outall or opt.eigen: prody.writeArray(os.path.join(outdir, prefix + '_evectors'+ext), pca.getArray(), delimiter=delim, format=format) prody.writeArray(os.path.join(outdir, prefix + '_evalues'+ext), pca.getEigenvalues(), delimiter=delim, format=format) if outall or opt.covar: prody.writeArray(os.path.join(outdir, prefix + '_covariance'+ext), pca.getCovariance(), delimiter=delim, format=format) if outall or opt.ccorr: prody.writeArray(os.path.join(outdir, prefix + '_cross-correlations' + ext), prody.calcCrossCorr(pca), delimiter=delim, format=format) if outall or opt.sqflucts: prody.writeArray(os.path.join(outdir, prefix + '_sqfluct'+ext), prody.calcSqFlucts(pca), delimiter=delim, format=format) if outall or opt.proj: prody.writeArray(os.path.join(outdir, prefix + '_proj'+ext), prody.calcProjection(ensemble, pca), delimiter=delim, format=format) figall, cc, sf, sp = opt.figures, opt.cc, opt.sf, opt.sp if figall or cc or sf or sp: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning('Matplotlib could not be imported. ' 'Figures are not saved.') else: LOGGER.info('Saving graphical output.') format, width, height, dpi = \ opt.figformat, opt.width, opt.height, opt.dpi format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(pca) plt.savefig(os.path.join(outdir, prefix + '_cc.'+format), dpi=dpi, format=format) plt.close('all') if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(pca) plt.savefig(os.path.join(outdir, prefix + '_sf.'+format), dpi=dpi, format=format) plt.close('all') if figall or sp: indices = [] for item in sp.split(): try: if '-' in item: item = item.split('-') if len(item) == 2: indices.append(range(int(item[0])-1, int(item[1]))) elif ',' in item: indices.append([int(i)-1 for i in item.split(',')]) else: indices.append(int(item)-1) except: pass for index in indices: plt.figure(figsize=(width, height)) prody.showProjection(ensemble, pca[index]) if isinstance(index, int): index = [index] index = [str(i+1) for i in index] plt.savefig(os.path.join(outdir, prefix + '_proj_' + '_'.join(index) + '.' + format), dpi=dpi, format=format) plt.close('all')
def prody_anm(opt): """Perform ANM calculations based on command line arguments.""" outdir = opt.outdir if not os.path.isdir(outdir): opt.subparser.error('{0:s} is not a valid path'.format(outdir)) import numpy as np import prody LOGGER = prody.LOGGER pdb = opt.pdb prefix = opt.prefix cutoff, gamma = opt.cutoff, opt.gamma, nmodes, selstr, model = opt.nmodes, opt.select, opt.model pdb = prody.parsePDB(pdb, model=model) if prefix == '_anm': prefix = pdb.getTitle() + '_anm' select = pdb.select(selstr) if select is None: opt.subparser('Selection "{0:s}" do not match any atoms.' .format(selstr)) LOGGER.info('{0:d} atoms will be used for ANM calculations.' .format(len(select))) anm = prody.ANM(pdb.getTitle()) anm.buildHessian(select, cutoff, gamma) anm.calcModes(nmodes) LOGGER.info('Writing numerical output.') if opt.npz: prody.saveModel(anm) prody.writeNMD(os.path.join(outdir, prefix + '.nmd'), anm, select) outall = opt.all delim, ext, format = opt.delim, opt.ext, opt.numformat if outall or opt.eigen: prody.writeArray(os.path.join(outdir, prefix + '_evectors'+ext), anm.getArray(), delimiter=delim, format=format) prody.writeArray(os.path.join(outdir, prefix + '_evalues'+ext), anm.getEigenvalues(), delimiter=delim, format=format) if outall or opt.beta: fout = prody.openFile(prefix + '_beta.txt', 'w', folder=outdir) fout.write('{0[0]:1s} {0[1]:4s} {0[2]:4s} {0[3]:5s} {0[4]:5s}\n' .format(['C', 'RES', '####', 'Exp.', 'The.'])) for data in zip(select.getChids(), select.getResnames(), select.getResnums(), select.getBetas(), prody.calcTempFactors(anm, select)): fout.write('{0[0]:1s} {0[1]:4s} {0[2]:4d} {0[3]:5.2f} {0[4]:5.2f}\n' .format(data)) fout.close() if outall or opt.covar: prody.writeArray(os.path.join(outdir, prefix + '_covariance'+ext), anm.getCovariance(), delimiter=delim, format=format) if outall or opt.ccorr: prody.writeArray(os.path.join(outdir, prefix + '_cross-correlations' + ext), prody.calcCrossCorr(anm), delimiter=delim, format=format) if outall or opt.hessian: prody.writeArray(os.path.join(outdir, prefix + '_hessian'+ext), anm.getHessian(), delimiter=delim, format=format) if outall or opt.kirchhoff: prody.writeArray(os.path.join(outdir, prefix + '_kirchhoff'+ext), anm.getKirchhoff(), delimiter=delim, format=format) if outall or opt.sqflucts: prody.writeArray(os.path.join(outdir, prefix + '_sqflucts'+ext), prody.calcSqFlucts(anm), delimiter=delim, format=format) figall, cc, sf, bf, cm = opt.figures, opt.cc, opt.sf, opt.bf, opt.cm if figall or cc or sf or bf or cm: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning('Matplotlib could not be imported. ' 'Figures are not saved.') else: LOGGER.info('Saving graphical output.') format, width, height, dpi = \ opt.figformat, opt.width, opt.height, opt.dpi format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(anm) plt.savefig(os.path.join(outdir, prefix + '_cc.'+format), dpi=dpi, format=format) plt.close('all') if figall or cm: plt.figure(figsize=(width, height)) prody.showContactMap(anm) plt.savefig(os.path.join(outdir, prefix + '_cm.'+format), dpi=dpi, format=format) plt.close('all') if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(anm) plt.savefig(os.path.join(outdir, prefix + '_sf.'+format), dpi=dpi, format=format) plt.close('all') if figall or bf: plt.figure(figsize=(width, height)) bexp = select.getBetas() bcal = prody.calcTempFactors(anm, select) plt.plot(bexp, label='Experimental') plt.plot(bcal, label=('Theoretical (R={0:.2f})' .format(np.corrcoef(bcal, bexp)[0,1]))) plt.legend(prop={'size': 10}) plt.xlabel('Node index') plt.ylabel('Experimental B-factors') plt.title(pdb.getTitle() + ' B-factors') plt.savefig(os.path.join(outdir, prefix + '_bf.'+format), dpi=dpi, format=format) plt.close('all')
def prody_pca(coords, **kwargs): """Perform PCA calculations for PDB or DCD format *coords* file. """ for key in DEFAULTS: if not key in kwargs: kwargs[key] = DEFAULTS[key] from os.path import isdir, splitext, join outdir = kwargs.get('outdir') if not isdir(outdir): raise IOError('{0} is not a valid path'.format(repr(outdir))) import prody LOGGER = prody.LOGGER prefix = kwargs.get('prefix') nmodes = kwargs.get('nmodes') selstr = kwargs.get('select') ext = splitext(coords)[1].lower() if ext == '.gz': ext = splitext(coords[:-3])[1].lower() if ext == '.dcd': pdb = kwargs.get('psf') or kwargs.get('pdb') if pdb: if splitext(pdb)[1].lower() == '.psf': pdb = prody.parsePSF(pdb) else: pdb = prody.parsePDB(pdb) dcd = prody.DCDFile(coords) if prefix == '_pca' or prefix == '_eda': prefix = dcd.getTitle() + prefix if len(dcd) < 2: raise ValueError('DCD file must have multiple frames') if pdb: if pdb.numAtoms() == dcd.numAtoms(): select = pdb.select(selstr) dcd.setAtoms(select) LOGGER.info('{0} atoms are selected for calculations.' .format(len(select))) else: select = pdb.select(selstr) if select.numAtoms() != dcd.numAtoms(): raise ValueError('number of selected atoms ({0}) does ' 'not match number of atoms in the DCD ' 'file ({1})'.format(select.numAtoms(), dcd.numAtoms())) if pdb.numCoordsets(): dcd.setCoords(select.getCoords()) else: select = prody.AtomGroup() select.setCoords(dcd.getCoords()) pca = prody.PCA(dcd.getTitle()) if len(dcd) > 1000: pca.buildCovariance(dcd, aligned=kwargs.get('aligned')) pca.calcModes(nmodes) ensemble = dcd else: ensemble = dcd[:] if not kwargs.get('aligned'): ensemble.iterpose() pca.performSVD(ensemble) else: pdb = prody.parsePDB(coords) if pdb.numCoordsets() < 2: raise ValueError('PDB file must contain multiple models') if prefix == '_pca' or prefix == '_eda': prefix = pdb.getTitle() + prefix select = pdb.select(selstr) LOGGER.info('{0} atoms are selected for calculations.' .format(len(select))) if select is None: raise ValueError('selection {0} do not match any atoms' .format(repr(selstr))) LOGGER.info('{0} atoms will be used for PCA calculations.' .format(len(select))) ensemble = prody.Ensemble(select) pca = prody.PCA(pdb.getTitle()) if not kwargs.get('aligned'): ensemble.iterpose() pca.performSVD(ensemble) LOGGER.info('Writing numerical output.') if kwargs.get('outnpz'): prody.saveModel(pca, join(outdir, prefix)) prody.writeNMD(join(outdir, prefix + '.nmd'), pca[:nmodes], select) extend = kwargs.get('extend') if extend: if pdb: if extend == 'all': extended = prody.extendModel(pca[:nmodes], select, pdb) else: extended = prody.extendModel(pca[:nmodes], select, select | pdb.bb) prody.writeNMD(join(outdir, prefix + '_extended_' + extend + '.nmd'), *extended) else: prody.LOGGER.warn('Model could not be extended, provide a PDB or ' 'PSF file.') outall = kwargs.get('outall') delim = kwargs.get('numdelim') ext = kwargs.get('numext') format = kwargs.get('numformat') if outall or kwargs.get('outeig'): prody.writeArray(join(outdir, prefix + '_evectors'+ext), pca.getArray(), delimiter=delim, format=format) prody.writeArray(join(outdir, prefix + '_evalues'+ext), pca.getEigvals(), delimiter=delim, format=format) if outall or kwargs.get('outcov'): prody.writeArray(join(outdir, prefix + '_covariance'+ext), pca.getCovariance(), delimiter=delim, format=format) if outall or kwargs.get('outcc') or kwargs.get('outhm'): cc = prody.calcCrossCorr(pca) if outall or kwargs.get('outcc'): prody.writeArray(join(outdir, prefix + '_cross-correlations' + ext), cc, delimiter=delim, format=format) if outall or kwargs.get('outhm'): resnums = select.getResnums() hmargs = {} if resnums is None else {'resnums': resnums} prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'), cc, xlabel='Residue', ylabel='Residue', title=pca.getTitle() + ' cross-correlations', **hmargs) if outall or kwargs.get('outsf'): prody.writeArray(join(outdir, prefix + '_sqfluct'+ext), prody.calcSqFlucts(pca), delimiter=delim, format=format) if outall or kwargs.get('outproj'): prody.writeArray(join(outdir, prefix + '_proj'+ext), prody.calcProjection(ensemble, pca), delimiter=delim, format=format) figall = kwargs.get('figall') cc = kwargs.get('figcc') sf = kwargs.get('figsf') sp = kwargs.get('figproj') if figall or cc or sf or sp: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning('Matplotlib could not be imported. ' 'Figures are not saved.') else: prody.SETTINGS['auto_show'] = False LOGGER.info('Saving graphical output.') format = kwargs.get('figformat') width = kwargs.get('figwidth') height = kwargs.get('figheight') dpi = kwargs.get('figdpi') format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(pca) plt.savefig(join(outdir, prefix + '_cc.'+format), dpi=dpi, format=format) plt.close('all') if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(pca) plt.savefig(join(outdir, prefix + '_sf.'+format), dpi=dpi, format=format) plt.close('all') if figall or sp: indices = [] for item in sp.split(): try: if '-' in item: item = item.split('-') if len(item) == 2: indices.append(list(range(int(item[0])-1, int(item[1])))) elif ',' in item: indices.append([int(i)-1 for i in item.split(',')]) else: indices.append(int(item)-1) except: pass for index in indices: plt.figure(figsize=(width, height)) prody.showProjection(ensemble, pca[index]) if isinstance(index, int): index = [index] index = [str(i+1) for i in index] plt.savefig(join(outdir, prefix + '_proj_' + '_'.join(index) + '.' + format), dpi=dpi, format=format) plt.close('all')
def corepagecalculation(pdbfilename, selatom, noma1, nummodes, gamcut, cut1, gam2, cut2, showresults, smodes, snmd, smodel, scollec, massnomass, sample1, modeens, confens, rmsdens, traverse1, modetra, steptra, rmsdtra, modelnumber, caanm, cagnm, nohanm, nohgnm, allanm, allgnm, bbanm, bbgnm, scanm, scgnm, nmdfolder, modesfolder, collectivityfolder, modelnewname, nmdnewname, modesnewname, modesendname, collectivitynewname, collectivityendname, samplenewname, traversenewname, crosscorr=0, corrfolder='', corrname='', corrend='', compmode01='7', compmode02='15', sqflucts=0, sqfluctsfolder='', sqfluctsname='', sqfluctsend='', separatevar1='0', temfac=0, temfacfolder='', temfacname='', temfacend='', fracovar=0, fraconame='', fracoend='', ovlap=0, ovlapfold='', ovlapname='', ovlapend='', ovlaptab=0, ovlaptabname='', ovlaptabend='', comppdbfilename=''): # modelnumber import prody import time import os import Tkinter root=Tkinter.Tk() root.title('Info') onlypage=Tkinter.Frame(root) onlypage.pack(side='top') Tkinter.Label(onlypage,text='File: '+pdbfilename).grid(row=0,column=0,sticky='w') Tkinter.Label(onlypage,text='Atoms: '+selatom).grid(row=1,column=0,sticky='w') Tkinter.Label(onlypage,text='Analysis: '+noma1).grid(row=2,column=0,sticky='w') path=os.path.join(os.path.expanduser('~'),'.noma/') fin = open(path+'savefile.txt','r') global savedfile savedfile=fin.readlines() fin.close() i=0 a=len(savedfile) while i<a: savedfile[i]=savedfile[i][:-1] i+=1 if gamcut=='0': Tkinter.Label(onlypage,text='Gamma: r^'+savedfile[91]).grid(row=3,column=0,sticky='w') Tkinter.Label(onlypage,text='Cutoff: '+cut1).grid(row=4,column=0,sticky='w') elif gamcut=='1': Tkinter.Label(onlypage,text='Gamma: '+gam2).grid(row=3,column=0,sticky='w') Tkinter.Label(onlypage,text='Cutoff: '+cut2).grid(row=4,column=0,sticky='w') find = 0 # while find < len(pdbfilename): # if pdbfilename[-(find+1):-find] == '/': # bgn = len(pdbfilename)-find # break # else: # helps in the find +=1 # saving of files try: # float(bgn) # except (NameError): # bgn = 0 # find = 0 # while bgn+find<len(pdbfilename): # if pdbfilename[bgn+find:bgn+find+1] == '.': # end = len(pdbfilename)-(bgn+find) # break # else: # find +=1 # try: # name = pdbfilename[bgn:-end] # except (NameError): # name = pdbfilename[bgn:len(pdbfilename)] # name of the file bgn = pdbfilename[:bgn] # path for file mytimeis = time.asctime(time.localtime(time.time())) start = time.time() try: p38 = prody.parsePDB(pdbfilename,model=int(modelnumber)) except: import tkMessageBox tkMessageBox.askokcancel("File Error","""This is not the correct path or name. Try entering /some/path/nameoffile.pdb If you need help finding the path, open a new terminal and enter: find -name 'filename.pdb' use the output as the pdb input If this doesn't work, make sure the file is in PDB format.""") p38 = prody.parsePDB(pdbfilename) print 'Submitted: '+pdbfilename+' at '+mytimeis Tkinter.Label(onlypage,text='Submitted at: '+mytimeis).grid(row=5,column=0,sticky='w') root.update() if selatom == "C-alpha" and noma1 == "Gaussian Normal Mode": folder = cagnm+'/' pro = p38.select('protein and name CA') # selects only carbon alpahs elif selatom == "C-alpha" and noma1 == "Anisotropic Normal Mode": folder = caanm+'/' pro = p38.select('protein and name CA') elif selatom == "Heavy" and noma1 == "Gaussian Normal Mode": folder = nohgnm+'/' pro = p38.select('protein and not name "[1-9]?H.*"') # gets rid of all Hydrogens elif selatom == "Heavy" and noma1 == "Anisotropic Normal Mode": folder = nohanm+'/' pro = p38.select('protein and not name "[1-9]?H.*"') elif selatom == "All" and noma1 == "Gaussian Normal Mode": folder = allgnm+'/' pro = p38.select('protein') elif selatom == "All" and noma1 == "Anisotropic Normal Mode": folder = allanm+'/' pro = p38.select('protein') elif selatom == "Backbone" and noma1 == "Gaussian Normal Mode": folder = bbgnm+'/' pro = p38.select('protein and name CA C O N H') # selects backbone elif selatom == "Backbone" and noma1 == "Anisotropic Normal Mode": folder = bbanm+'/' pro = p38.select('protein and name CA C O N H') # selects backbone elif selatom == "Sidechain" and noma1 == "Gaussian Normal Mode": folder = scgnm+'/' pro = p38.select('protein and not name CA C O N H') # selects sidechain elif selatom == "Sidechain" and noma1 == "Anisotropic Normal Mode": folder = scanm+'/' pro = p38.select('protein and not name CA C O N H') # selects sidechain try: # open(bgn+folder) # creates the folders except (IOError): # where the files will try: # be saved only if they os.makedirs(bgn+folder) # are not there except (OSError): # mer = 0 # if noma1 == "Gaussian Normal Mode": print 'Building the Kirchhoff matrix' Tkinter.Label(onlypage,text='Building Kirchhoff').grid(row=6,column=0,sticky='w') root.update() anm = prody.GNM(name)### if gamcut=='0': anm.buildKirchhoff(pro,cutoff=float(cut1),gamma=gammaDistanceDependent)### anm.setKirchhoff(anm.getKirchhoff()) elif gamcut=='1': anm.buildKirchhoff(pro,cutoff=float(cut2),gamma=float(gam2))### brat = 2 elif noma1 == "Anisotropic Normal Mode": print 'Building the Hessian matrix' Tkinter.Label(onlypage,text='Building Hessian').grid(row=6,column=0,sticky='w') root.update() anm = prody.ANM(name)### if gamcut=='0': anm.buildHessian(pro,cutoff=float(cut1),gamma=gammaDistanceDependent)### anm.setHessian(anm.getHessian())### elif gamcut=='1': anm.buildHessian(pro,cutoff=float(cut2),gamma=float(gam2))### brat = 7 print 'Calculating modes' Tkinter.Label(onlypage,text='Calculating modes').grid(row=7,column=0,sticky='w') root.update() anm.calcModes(int(nummodes),zeros = True)### numatom=anm.numAtoms()### eigval=anm.getEigvals()### atomname=pro.getNames()### if smodel==1: if brat==2: modelfilename=bgn+folder+name+modelnewname+'.gnm.npz' elif brat==7: modelfilename=bgn+folder+name+modelnewname+'.anm.npz' print 'Saving Model' Tkinter.Label(onlypage,text='Saving Model').grid(row=8,column=0,sticky='w') root.update() try: prody.saveModel(anm,bgn+folder+name+modelnewname,True)### except: print 'Matrix not saved due to size' Tkinter.Label(onlypage,text='Matrix not saved').grid(row=8,column=0,sticky='w') root.update() prody.saveModel(anm,bgn+folder+name+modelnewname)### if snmd==1: print 'Saving NMD' Tkinter.Label(onlypage,text='Saving NMD').grid(row=9,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+nmdfolder+'/') # except (OSError): # mer = 0 # prody.writeNMD(bgn+folder+nmdfolder+'/'+name+nmdnewname+'.nmd',anm[:len(eigval)],pro)### # this can be viewed in VMD if smodes==1: print 'Saving Modes' Tkinter.Label(onlypage,text='Saving Modes').grid(row=10,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+modesfolder+'/') # except (OSError): # mer = 0 # modefile = bgn+folder+modesfolder+'/'+name+modesnewname+'.'+modesendname fout = open(modefile,'w') mer = 0 while mer< len(eigval): slowest_mode = anm[mer]### r = slowest_mode.getEigvec()### p = slowest_mode.getEigval()### tq = 0 tt = 0 ttt = 1 tttt = 2 fout.write('MODE {0:3d} {1:15e}'.format(mer+1,p)) fout.write(""" ------------------------------------------------- """) if noma1 == "Gaussian Normal Mode": while tq < numatom: fout.write("""{0:4s}{1:15e} """.format(atomname[tq],r[tq])) tq +=1 elif noma1 == "Anisotropic Normal Mode": while tt < numatom*3: fout.write("""{0:4s}{1:15e}{2:15e}{3:15e} """.format(atomname[tq],r[tt],r[ttt],r[tttt])) tq+=1 tt +=3 ttt+=3 tttt+=3 mer +=1 fout.close() if showresults=='1': os.system('/usr/bin/gnome-open '+modefile) if scollec==1: print 'Saving collectivity' Tkinter.Label(onlypage,text='Saving collectivity').grid(row=11,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+collectivityfolder+'/') # except (OSError): # mer = 0 # mer = 0 xx = [0]*(numatom) # sets the array to zero and other initial conditions i = 0 aa = 0 no = 0 var3 = 0 sss = [0]*(len(eigval)) while mer< len(eigval): slowest_mode = anm[mer]### r = slowest_mode.getEigvec()### p = slowest_mode.getEigval()### a = 0 tt = 0 ttt = 1 tttt = 2 while a < numatom: atom = atomname[a] mass = 0 while mass < 2: if atom[mass] == "N": # all nitrogen m = 14.0067 break elif atom[mass] == 'H': # all hydrogen m = 1.00794 break elif atom[mass] == "C" : # all carbon m = 12.0107 break elif atom[mass] == "O" : # all oxygen m = 15.9994 break elif atom[mass] == 'S': # all sulfur m = 32.065 break elif atom[mass] == 'P' : # all phosphorus m = 30.973762 break else: if mass == 0: mass +=1 try: atom[mass] except (IndexError): m = 1 if no == 0: print 'Enter atom '+atom+' in to the system. Its mass was set to 1 in this simulation.' no +=1 break else: m = 1 if no == 0: print 'Enter atom '+atom+' in to the system. Its mass was set to 1 in this simulation' no +=1 break if len(r)/numatom == 3: xx[i] = (r[tt]**2 + r[ttt]**2 + r[tttt]**2)/m i +=1 tt +=3 ttt+=3 tttt+=3 else: xx[i] = (r[tt]**2)/m i +=1 tt +=1 a +=1 var3 = 0 j = 0 loop = 1 while loop == 1: if sum(xx) == 0: # need this because you can't divide by 0 loop = 0 elif j <(numatom): var1 = xx[j]/sum(xx) if var1 == 0: var2 = 0 elif var1 != 0: from math import log # this means natural log var2 = var1* log(var1) var3 += var2 j +=1 else: from math import exp k = exp(-var3)/numatom sss[aa] = k, aa+1 aa +=1 mer +=1 loop = 0 i = 0 xx = [0]*(numatom) # goes through all this until the big loop is done a = 0 k=[0]*(len(eigval)) while a < len(eigval): k[a]=prody.calcCollectivity(anm[a]),a+1 a +=1 collectivefile = bgn+folder+collectivityfolder+'/'+name+collectivitynewname+'.'+collectivityendname fout = open(collectivefile,'w') if massnomass=='0': fout.write('MODE COLLECTIVITY(mass)') fout.write(""" --------------------------- """) for h in sorted(sss,reverse=True): fout.write(str(h)[-3:-1]+' '+str(h)[1:19]+""" """) fout.write(""" MODE COLLECTIVITY(without mass)""") fout.write(""" --------------------------- """) for hh in sorted(k,reverse=True): fout.write(str(hh)[-3:-1]+' '+str(hh)[1:19]+""" """) elif massnomass=='1': fout.write('MODE COLLECTIVITY(without mass)') fout.write(""" --------------------------- """) for hh in sorted(k,reverse=True): fout.write(str(hh)[-3:-1]+' '+str(hh)[1:19]+""" """) fout.write(""" MODE COLLECTIVITY(mass)""") fout.write(""" --------------------------- """) for h in sorted(sss,reverse=True): fout.write(str(h)[-3:-1]+' '+str(h)[1:19]+""" """) fout.close() if showresults=='1': os.system('/usr/bin/gnome-open '+collectivefile) fin = open(collectivefile,'r') lst = fin.readlines() hi0 = 2 looop = 1 prut=0 secoll=0 thicoll=0 while looop == 1: fine = lst[hi0] if int(fine[0:2]) >= brat: if prut==0: prut=fine[0:2] elif secoll==0: secoll=fine[0:2] elif thicoll==0: thicoll=fine[0:2] else: foucoll=fine[0:2] looop = 0 else: hi0 +=1 mostcollective= "Mode "+prut+" is the most collective." Tkinter.Label(onlypage,text='Mode '+prut+' is the most collective').grid(row=12,column=0,sticky='w') root.update() print mostcollective fin.close() if sample1 == 1: print 'Saving sample file' Tkinter.Label(onlypage,text='Saving sample file').grid(row=13,column=0,sticky='w') root.update() a = modeens+' ' b = [0]*(len(a)+1) i = 0 j = 0 b1 = 0 while i < len(a): if a[i:i+1] ==' ' or a[i:i+1]==',': try: b[b1]=int(a[j:i])-1 except: if '1c' in a[j:i]: b[b1]=int(prut)-1 elif '2c' in a[j:i]: b[b1]=int(prut)-1 b1 +=1 b[b1]=int(secoll)-1 elif '3c' in a[j:i]: b[b1]=int(prut)-1 b1 +=1 b[b1]=int(secoll)-1 b1 +=1 b[b1]=int(thicoll)-1 elif '4c' in a[j:i]: b[b1]=int(prut)-1 b1 +=1 b[b1]=int(secoll)-1 b1 +=1 b[b1]=int(thicoll)-1 b1+=1 b[b1]=int(foucoll)-1 j = i+1 i +=1 b1 +=1 else: i +=1 del b[b1:] ensemble = prody.sampleModes(anm[b],pro, n_confs=int(confens), rmsd =float(rmsdens)) p38ens=pro.copy() p38ens.delCoordset(0) p38ens.addCoordset(ensemble.getCoordsets()) prody.writePDB(bgn+folder+name+samplenewname+'.pdb',p38ens) if traverse1 ==1: print 'Saving traverse file' Tkinter.Label(onlypage,text='Saving traverse file').grid(row=14,column=0,sticky='w') root.update() if modetra=='c': modefortra=int(prut)-1 else: modefortra=int(modetra)-1 trajectory=prody.traverseMode(anm[modefortra],pro,n_steps=int(steptra),rmsd=float(rmsdtra)) prody.calcRMSD(trajectory).round(2) p38traj=pro.copy() p38traj.delCoordset(0) p38traj.addCoordset(trajectory.getCoordsets()) prody.writePDB(bgn+folder+name+'_mode'+str(modefortra+1)+traversenewname+'.pdb',p38traj) if crosscorr==1: print 'Saving cross correlation' Tkinter.Label(onlypage,text='Saving cross-correlation').grid(row=15,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+corrfolder+'/') # except (OSError): # mer = 0 i=int(compmode01) while i <= int(compmode02): x=i-1 correlationdataname=bgn+folder+corrfolder+'/'+name+corrname+'_mode'+str(x+1)+'.'+corrend prody.writeArray(correlationdataname,prody.calcCrossCorr(anm[x]),'%.18e') print correlationdataname i+=1 ## if sqflucts==1: print 'Saving square fluctuation' Tkinter.Label(onlypage,text='Saving square fluctuation').grid(row=16,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+sqfluctsfolder+'/') # except (OSError): # mer = 0 i=int(compmode01) while i < int(compmode02): yelp = i-1 sqfluctdataname = bgn+folder+sqfluctsfolder+'/'+name+sqfluctsname+'_mode'+str(yelp+1)+'.'+sqfluctsend fout = open(sqfluctdataname,'w') if separatevar1=='0': a = 0 while a < numatom: fout.write(str(a)) fout.write(""" """) fout.write(str(prody.calcSqFlucts(anm[yelp])[a])) fout.write(""" """) a +=1 elif separatevar1=='1': a=0 while a <numatom: firstresnum=int(p38.getResnums()[0:1][0]) origiresnum=int(p38.getResnums()[0:1][0]) while firstresnum<(int(numatom*1.0/p38.numChains())+origiresnum): fout.write(str(firstresnum)) fout.write('\t') fout.write(str(prody.calcSqFlucts(anm[yelp])[a])) fout.write('\n') a+=1 firstresnum+=1 fout.write('&\n') fout.close() print sqfluctdataname i+=1 if temfac==1: print 'Saving temperature factors' Tkinter.Label(onlypage,text='Saving temperature factors').grid(row=17,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+temfacfolder+'/') # except (OSError): # mer = 0 fin=open(pdbfilename,'r') d = [None]*len(atomname) e = 0 for line in fin: pair = line.split() if 'ATOM ' in line and e < len(atomname): if str(pair[2]) == str(atomname[e]): d[e]=str(pair[1]) e+=1 else: e+=0 else: continue fin.close() sqf = prody.calcSqFlucts(anm) x = sqf/((sqf**2).sum()**.5) y = prody.calcTempFactors(anm,pro) a = 0 tempfactorsdataname =bgn+folder+temfacfolder+'/'+name+temfacname+'.'+temfacend fout=open(tempfactorsdataname,'w') fout.write("""Atom Residue TempFactor TempFactor with exp beta """) while a < numatom: fout.write("""{0:4s} {1:4d} {2:15f} {3:15f} """.format(d[a],a+1,x[a],y[a])) a +=1 fout.close() print tempfactorsdataname if fracovar==1: try: import matplotlib.pyplot as plt print 'Saving Fraction of Variance' Tkinter.Label(onlypage,text='Saving Fraction of Variance').grid(row=18,column=0,sticky='w') root.update() try: # os.makedirs(bgn+folder+modesfolder+'/') # except (OSError): # mer = 0 # plt.figure(figsize = (5,4)) prody.showFractVars(anm) prody.showCumulFractVars(anm) fracvardataname =bgn+folder+modesfolder+'/'+name+fraconame+'.'+fracoend plt.savefig(fracvardataname) print fracvardataname if showresults=='1': os.system('/usr/bin/gnome-open '+fracvardataname) except: print 'Error: Fraction of Variance' Tkinter.Label(onlypage,text='Error: Fraction of Variance').grid(row=18,column=0,sticky='w') root.update() mer=0 if ovlap==1 or ovlaptab==1: try: import matplotlib.pyplot as plt print 'Saving Overlap' Tkinter.Label(onlypage,text='Saving Overlap').grid(row=19,column=0,sticky='w') root.update() Tkinter.Label(onlypage,text='Comparison: '+comppdbfilename).grid(row=20,column=0,sticky='w') ## find = 0 while find < len(comppdbfilename): if comppdbfilename[-(find+1):-find] == '/': bgn1 = len(comppdbfilename)-find break else: find +=1 try: float(bgn1) except (NameError): bgn1 = 0 find = 0 while bgn1+find<len(comppdbfilename): if comppdbfilename[bgn1+find:bgn1+find+1] == '.': end1 = len(comppdbfilename)-(bgn1+find) break else: find +=1 try: name1 = comppdbfilename[bgn1:-end1] except (NameError): name1 = comppdbfilename[bgn1:len(comppdbfilename)] bgn1 = comppdbfilename[:bgn1] p381 = prody.parsePDB(comppdbfilename,model=int(modelnumber)) if selatom == "C-alpha" and noma1 == "Gaussian Normal Mode": pro1 = p381.select('protein and name CA') elif selatom == "C-alpha" and noma1 == "Anisotropic Normal Mode": pro1 = p381.select('protein and name CA') elif selatom == "Heavy" and noma1 == "Gaussian Normal Mode": pro1 = p381.select('protein and not name "[1-9]?H.*"') elif selatom == "Heavy" and noma1 == "Anisotropic Normal Mode": pro1 = p381.select('protein and not name "[1-9]?H.*"') elif selatom == "All" and noma1 == "Gaussian Normal Mode": pro1 = p381.select('protein') elif selatom == "All" and noma1 == "Anisotropic Normal Mode": pro1 = p381.select('protein') elif selatom == "Backbone" and noma1 == "Gaussian Normal Mode": pro1 = p381.select('protein and name CA C O N H') elif selatom == "Backbone" and noma1 == "Anisotropic Normal Mode": pro1 = p381.select('protein and name CA C O N H') elif selatom == "Sidechain" and noma1 == "Gaussian Normal Mode": pro1 = p381.select('protein and not name CA C O N H') elif selatom == "Sidechain" and noma1 == "Anisotropic Normal Mode": pro1 = p381.select('protein and not name CA C O N H') if noma1 == "Gaussian Normal Mode": print 'Building the Kirchhoff matrix' Tkinter.Label(onlypage,text='Building Kirchhoff').grid(row=21,column=0,sticky='w') root.update() anm1 = prody.GNM(name1) if gamcut=='0': anm1.buildKirchhoff(pro1,cutoff=float(cut1),gamma=gammaDistanceDependent) anm1.setKirchhoff(anm1.getKirchhoff()) elif gamcut=='1': anm1.buildKirchhoff(pro1,cutoff=float(cut2),gamma=float(gam2)) brat = 2 elif noma1 == "Anisotropic Normal Mode": print 'Building the Hessian matrix' Tkinter.Label(onlypage,text='Building Hessian').grid(row=21,column=0,sticky='w') root.update() anm1 = prody.ANM(name1) if gamcut=='0': anm1.buildHessian(pro1,cutoff=float(cut1),gamma=gammaDistanceDependent) anm1.setHessian(anm1.getHessian()) elif gamcut=='1': anm1.buildHessian(pro1,cutoff=float(cut2),gamma=float(gam2)) brat = 7 print 'Calculating modes' Tkinter.Label(onlypage,text='Calculating modes').grid(row=22,column=0,sticky='w') root.update() anm1.calcModes(int(nummodes),zeros = True) ## try: os.makedirs(bgn+folder+ovlapfold+'/') except (OSError): mer = 0 if ovlap==1: i=int(compmode01) while i < int(compmode02): a = i-1 plt.figure(figsize=(5,4)) prody.showCumulOverlap(anm[a],anm1) prody.showOverlap(anm[a],anm1) plt.title('Overlap with Mode '+str(a+1)+' from '+name) plt.xlabel(name1+' mode index') overlapname = bgn+folder+ovlapfold+'/'+name+'_'+name1+ovlapname+'_mode'+str(a+1)+'.'+ovlapend plt.savefig(overlapname) print overlapname i+=1 if ovlaptab==1: plt.figure(figsize=(5,4)) prody.showOverlapTable(anm1,anm) plt.xlim(int(compmode01)-1,int(compmode02)) plt.ylim(int(compmode01)-1,int(compmode02)) plt.title(name1+' vs '+name+' Overlap') plt.ylabel(name1) plt.xlabel(name) overlapname = bgn+folder+ovlapfold+'/'+name+'_'+name1+ovlaptabname+'.'+ovlaptabend plt.savefig(overlapname) print overlapname except: mer=0 root.destroy() mynewtimeis = float(time.time()-start) if mynewtimeis <= 60.00: timeittook= "The calculations took %.2f s."%(mynewtimeis) elif mynewtimeis > 60.00 and mynewtimeis <= 3600.00: timeittook= "The calculations took %.2f min."%((mynewtimeis/60.00)) else: timeittook= "The calculations took %.2f hrs."%((mynewtimeis/3600.00)) print timeittook if smodel==1 and scollec==1: return (timeittook,modelfilename,str(int(prut))) elif scollec==1: return (timeittook,'nofile',str(int(prut))) elif smodel==1: return (timeittook,modelfilename,'nocoll') else: return (timeittook,'nofile','nocoll')
def prody_anm(pdb, **kwargs): """Perform ANM calculations for *pdb*. """ for key in DEFAULTS: if not key in kwargs: kwargs[key] = DEFAULTS[key] from os.path import isdir, join outdir = kwargs.get('outdir') if not isdir(outdir): raise IOError('{0} is not a valid path'.format(repr(outdir))) import numpy as np import prody LOGGER = prody.LOGGER selstr = kwargs.get('select') prefix = kwargs.get('prefix') cutoff = kwargs.get('cutoff') gamma = kwargs.get('gamma') nmodes = kwargs.get('nmodes') selstr = kwargs.get('select') model = kwargs.get('model') pdb = prody.parsePDB(pdb, model=model) if prefix == '_anm': prefix = pdb.getTitle() + '_anm' select = pdb.select(selstr) if select is None: LOGGER.warn('Selection {0} did not match any atoms.' .format(repr(selstr))) return LOGGER.info('{0} atoms will be used for ANM calculations.' .format(len(select))) anm = prody.ANM(pdb.getTitle()) anm.buildHessian(select, cutoff, gamma) anm.calcModes(nmodes) LOGGER.info('Writing numerical output.') if kwargs.get('outnpz'): prody.saveModel(anm, join(outdir, prefix)) prody.writeNMD(join(outdir, prefix + '.nmd'), anm, select) extend = kwargs.get('extend') if extend: if extend == 'all': extended = prody.extendModel(anm, select, pdb) else: extended = prody.extendModel(anm, select, select | pdb.bb) prody.writeNMD(join(outdir, prefix + '_extended_' + extend + '.nmd'), *extended) outall = kwargs.get('outall') delim = kwargs.get('numdelim') ext = kwargs.get('numext') format = kwargs.get('numformat') if outall or kwargs.get('outeig'): prody.writeArray(join(outdir, prefix + '_evectors'+ext), anm.getArray(), delimiter=delim, format=format) prody.writeArray(join(outdir, prefix + '_evalues'+ext), anm.getEigvals(), delimiter=delim, format=format) if outall or kwargs.get('outbeta'): from prody.utilities import openFile fout = openFile(prefix + '_beta.txt', 'w', folder=outdir) fout.write('{0[0]:1s} {0[1]:4s} {0[2]:4s} {0[3]:5s} {0[4]:5s}\n' .format(['C', 'RES', '####', 'Exp.', 'The.'])) for data in zip(select.getChids(), select.getResnames(), select.getResnums(), select.getBetas(), prody.calcTempFactors(anm, select)): fout.write('{0[0]:1s} {0[1]:4s} {0[2]:4d} {0[3]:5.2f} {0[4]:5.2f}\n' .format(data)) fout.close() if outall or kwargs.get('outcov'): prody.writeArray(join(outdir, prefix + '_covariance' + ext), anm.getCovariance(), delimiter=delim, format=format) if outall or kwargs.get('outcc') or kwargs.get('outhm'): cc = prody.calcCrossCorr(anm) if outall or kwargs.get('outcc'): prody.writeArray(join(outdir, prefix + '_cross-correlations' + ext), cc, delimiter=delim, format=format) if outall or kwargs.get('outhm'): prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'), cc, resnum=select.getResnums(), xlabel='Residue', ylabel='Residue', title=anm.getTitle() + ' cross-correlations') if outall or kwargs.get('hessian'): prody.writeArray(join(outdir, prefix + '_hessian'+ext), anm.getHessian(), delimiter=delim, format=format) if outall or kwargs.get('kirchhoff'): prody.writeArray(join(outdir, prefix + '_kirchhoff'+ext), anm.getKirchhoff(), delimiter=delim, format=format) if outall or kwargs.get('outsf'): prody.writeArray(join(outdir, prefix + '_sqflucts'+ext), prody.calcSqFlucts(anm), delimiter=delim, format=format) figall = kwargs.get('figall') cc = kwargs.get('figcc') sf = kwargs.get('figsf') bf = kwargs.get('figbeta') cm = kwargs.get('figcmap') if figall or cc or sf or bf or cm: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning('Matplotlib could not be imported. ' 'Figures are not saved.') else: prody.SETTINGS['auto_show'] = False LOGGER.info('Saving graphical output.') format = kwargs.get('figformat') width = kwargs.get('figwidth') height = kwargs.get('figheight') dpi = kwargs.get('figdpi') format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(anm) plt.savefig(join(outdir, prefix + '_cc.'+format), dpi=dpi, format=format) plt.close('all') if figall or cm: plt.figure(figsize=(width, height)) prody.showContactMap(anm) plt.savefig(join(outdir, prefix + '_cm.'+format), dpi=dpi, format=format) plt.close('all') if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(anm) plt.savefig(join(outdir, prefix + '_sf.'+format), dpi=dpi, format=format) plt.close('all') if figall or bf: plt.figure(figsize=(width, height)) bexp = select.getBetas() bcal = prody.calcTempFactors(anm, select) plt.plot(bexp, label='Experimental') plt.plot(bcal, label=('Theoretical (R={0:.2f})' .format(np.corrcoef(bcal, bexp)[0,1]))) plt.legend(prop={'size': 10}) plt.xlabel('Node index') plt.ylabel('Experimental B-factors') plt.title(pdb.getTitle() + ' B-factors') plt.savefig(join(outdir, prefix + '_bf.'+format), dpi=dpi, format=format) plt.close('all')
def prody_pca(coords, **kwargs): """Perform PCA calculations for PDB or DCD format *coords* file. """ for key in DEFAULTS: if not key in kwargs: kwargs[key] = DEFAULTS[key] from os.path import isdir, splitext, join outdir = kwargs.get('outdir') if not isdir(outdir): raise IOError('{0} is not a valid path'.format(repr(outdir))) import prody LOGGER = prody.LOGGER prefix = kwargs.get('prefix') nmodes = kwargs.get('nmodes') selstr = kwargs.get('select') quiet = kwargs.pop('quiet', False) altloc = kwargs.get('altloc') ext = splitext(coords)[1].lower() if ext == '.gz': ext = splitext(coords[:-3])[1].lower() if ext == '.dcd': pdb = kwargs.get('psf') or kwargs.get('pdb') if pdb: if splitext(pdb)[1].lower() == '.psf': pdb = prody.parsePSF(pdb) else: pdb = prody.parsePDB(pdb, altlocs=altlocs) dcd = prody.DCDFile(coords) if prefix == '_pca' or prefix == '_eda': prefix = dcd.getTitle() + prefix if len(dcd) < 2: raise ValueError('DCD file must have multiple frames') if pdb: if pdb.numAtoms() == dcd.numAtoms(): select = pdb.select(selstr) dcd.setAtoms(select) LOGGER.info('{0} atoms are selected for calculations.'.format( len(select))) else: select = pdb.select(selstr) if select.numAtoms() != dcd.numAtoms(): raise ValueError('number of selected atoms ({0}) does ' 'not match number of atoms in the DCD ' 'file ({1})'.format( select.numAtoms(), dcd.numAtoms())) if pdb.numCoordsets(): dcd.setCoords(select.getCoords()) else: select = prody.AtomGroup() select.setCoords(dcd.getCoords()) pca = prody.PCA(dcd.getTitle()) nproc = kwargs.get('nproc') if nproc: try: from threadpoolctl import threadpool_limits except ImportError: raise ImportError( 'Please install threadpoolctl to control threads') with threadpool_limits(limits=nproc, user_api="blas"): if len(dcd) > 1000: pca.buildCovariance(dcd, aligned=kwargs.get('aligned'), quiet=quiet) pca.calcModes(nmodes) ensemble = dcd else: ensemble = dcd[:] if not kwargs.get('aligned'): ensemble.iterpose(quiet=quiet) pca.performSVD(ensemble) nmodes = pca.numModes() else: if len(dcd) > 1000: pca.buildCovariance(dcd, aligned=kwargs.get('aligned'), quiet=quiet) pca.calcModes(nmodes) ensemble = dcd else: ensemble = dcd[:] if not kwargs.get('aligned'): ensemble.iterpose(quiet=quiet) pca.performSVD(ensemble) nmodes = pca.numModes() else: pdb = prody.parsePDB(coords) if pdb.numCoordsets() < 2: raise ValueError('PDB file must contain multiple models') if prefix == '_pca' or prefix == '_eda': prefix = pdb.getTitle() + prefix select = pdb.select(selstr) LOGGER.info('{0} atoms are selected for calculations.'.format( len(select))) if select is None: raise ValueError('selection {0} do not match any atoms'.format( repr(selstr))) LOGGER.info('{0} atoms will be used for PCA calculations.'.format( len(select))) ensemble = prody.Ensemble(select) pca = prody.PCA(pdb.getTitle()) if not kwargs.get('aligned'): ensemble.iterpose() nproc = kwargs.get('nproc') if nproc: try: from threadpoolctl import threadpool_limits except ImportError: raise ImportError( 'Please install threadpoolctl to control threads') with threadpool_limits(limits=nproc, user_api="blas"): pca.performSVD(ensemble) else: pca.performSVD(ensemble) LOGGER.info('Writing numerical output.') if kwargs.get('outnpz'): prody.saveModel(pca, join(outdir, prefix)) if kwargs.get('outscipion'): prody.writeScipionModes(outdir, pca) prody.writeNMD(join(outdir, prefix + '.nmd'), pca[:nmodes], select) extend = kwargs.get('extend') if extend: if pdb: if extend == 'all': extended = prody.extendModel(pca[:nmodes], select, pdb) else: extended = prody.extendModel(pca[:nmodes], select, select | pdb.bb) prody.writeNMD( join(outdir, prefix + '_extended_' + extend + '.nmd'), *extended) else: prody.LOGGER.warn('Model could not be extended, provide a PDB or ' 'PSF file.') outall = kwargs.get('outall') delim = kwargs.get('numdelim') ext = kwargs.get('numext') format = kwargs.get('numformat') if outall or kwargs.get('outeig'): prody.writeArray(join(outdir, prefix + '_evectors' + ext), pca.getArray(), delimiter=delim, format=format) prody.writeArray(join(outdir, prefix + '_evalues' + ext), pca.getEigvals(), delimiter=delim, format=format) if outall or kwargs.get('outcov'): prody.writeArray(join(outdir, prefix + '_covariance' + ext), pca.getCovariance(), delimiter=delim, format=format) if outall or kwargs.get('outcc') or kwargs.get('outhm'): cc = prody.calcCrossCorr(pca) if outall or kwargs.get('outcc'): prody.writeArray(join(outdir, prefix + '_cross-correlations' + ext), cc, delimiter=delim, format=format) if outall or kwargs.get('outhm'): resnums = select.getResnums() hmargs = {} if resnums is None else {'resnums': resnums} prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'), cc, xlabel='Residue', ylabel='Residue', title=pca.getTitle() + ' cross-correlations', **hmargs) if outall or kwargs.get('outsf'): prody.writeArray(join(outdir, prefix + '_sqfluct' + ext), prody.calcSqFlucts(pca), delimiter=delim, format=format) if outall or kwargs.get('outproj'): prody.writeArray(join(outdir, prefix + '_proj' + ext), prody.calcProjection(ensemble, pca), delimiter=delim, format=format) figall = kwargs.get('figall') cc = kwargs.get('figcc') sf = kwargs.get('figsf') sp = kwargs.get('figproj') if figall or cc or sf or sp: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning('Matplotlib could not be imported. ' 'Figures are not saved.') else: prody.SETTINGS['auto_show'] = False LOGGER.info('Saving graphical output.') format = kwargs.get('figformat') width = kwargs.get('figwidth') height = kwargs.get('figheight') dpi = kwargs.get('figdpi') format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(pca) plt.savefig(join(outdir, prefix + '_cc.' + format), dpi=dpi, format=format) plt.close('all') if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(pca) plt.savefig(join(outdir, prefix + '_sf.' + format), dpi=dpi, format=format) plt.close('all') if figall or sp: indices = [] for item in sp.split(): try: if '-' in item: item = item.split('-') if len(item) == 2: indices.append( list(range(int(item[0]) - 1, int(item[1])))) elif ',' in item: indices.append( [int(i) - 1 for i in item.split(',')]) else: indices.append(int(item) - 1) except: pass for index in indices: plt.figure(figsize=(width, height)) prody.showProjection(ensemble, pca[index]) if isinstance(index, Integral): index = [index] index = [str(i + 1) for i in index] plt.savefig(join( outdir, prefix + '_proj_' + '_'.join(index) + '.' + format), dpi=dpi, format=format) plt.close('all')
def evol_coevol(msa, **kwargs): from numpy import arange import prody from prody import parseMSA, buildMutinfoMatrix, showMutinfoMatrix from prody import applyMutinfoCorr, calcShannonEntropy from prody import writeArray, LOGGER, applyMutinfoNorm, writeHeatmap from os.path import splitext prefix = kwargs.get('prefix') if prefix is None: prefix, _ = splitext(msa) if _.lower() == '.gz': prefix, _ = splitext(prefix) prefix += '_mutinfo' msa = parseMSA(msa) mutinfo = buildMutinfoMatrix(msa, **kwargs) numformat = kwargs.get('numformat', '%12g') heatmap = kwargs.get('heatmap', False) #writeArray(prefix + '.txt', mutinfo, format=numformat) if heatmap: hmargs = { 'xlabel': 'Residue', 'ylabel': 'Residue', 'xorigin': 1, 'xstep': 1, 'residue': arange(msa.numResidues())} todo = [(None, None)] norm = kwargs.get('normalization', []) corr = kwargs.get('correction', []) if norm is not None: if 'joint' in norm: todo.append(('norm', 'joint')) for which in norm: if which == 'join': continue todo.append(('norm', which)) if corr is not None: for which in corr: todo.append(('corr', which)) entropy = None for what, which in todo: if what is None: matrix = mutinfo suffix = '' tuffix = ' Mutual Information' elif which == 'joint': LOGGER.info('Applying {0} normalization.'.format(repr(which))) matrix = buildMutinfoMatrix(msa, norm=True, **kwargs) suffix = '_norm_joint' tuffix = ' MI - Normalization: ' + which elif what == 'norm': LOGGER.info('Applying {0} normalization.'.format(repr(which))) if entropy is None: entropy = calcShannonEntropy(msa, **kwargs) matrix = applyMutinfoNorm(mutinfo, entropy, norm=which) suffix = '_norm_' + which tuffix = ' MI - Normalization: ' + which else: LOGGER.info('Applying {0} correction.'.format(repr(which))) matrix = applyMutinfoCorr(mutinfo, which) suffix = '_corr_' + which tuffix = ' MI - Correction: ' + which writeArray(prefix + suffix + '.txt', matrix, format=kwargs.get('numformat', '%12g')) if heatmap: writeHeatmap(prefix + suffix + '.hm', matrix, title = msa.getTitle() + tuffix, **hmargs) if kwargs.get('figcoevol'): try: import matplotlib.pyplot as plt except ImportError: LOGGER.warn('Matplotlib could not be imported, ' 'figures are not saved.') else: cmin = kwargs.get('cmin', matrix.min()) cmax = kwargs.get('cmax', matrix.max()) prody.SETTINGS['auto_show'] = False width = kwargs.get('figwidth', 8) height = kwargs.get('figheight', 6) xlabel = kwargs.get('xlabel') title = kwargs.get('title') figure = plt.figure(figsize=(width, height)) show = showMutinfoMatrix(matrix, msa=msa, clim=(cmin, cmax), xlabel=xlabel, title=title) format = kwargs.get('figformat', 'pdf') figure.savefig(prefix + suffix + '.' + format, format=format, dpi=kwargs.get('figdpi', 300))
def prody_anm(pdb, **kwargs): """Perform ANM calculations for *pdb*. """ for key in DEFAULTS: if not key in kwargs: kwargs[key] = DEFAULTS[key] from os.path import isdir, join outdir = kwargs.get('outdir') if not isdir(outdir): raise IOError('{0} is not a valid path'.format(repr(outdir))) import numpy as np import prody LOGGER = prody.LOGGER selstr = kwargs.get('select') prefix = kwargs.get('prefix') cutoff = kwargs.get('cutoff') gamma = kwargs.get('gamma') nmodes = kwargs.get('nmodes') selstr = kwargs.get('select') model = kwargs.get('model') pdb = prody.parsePDB(pdb, model=model) if prefix == '_anm': prefix = pdb.getTitle() + '_anm' select = pdb.select(selstr) if select is None: LOGGER.warn('Selection {0} did not match any atoms.'.format( repr(selstr))) return LOGGER.info('{0} atoms will be used for ANM calculations.'.format( len(select))) anm = prody.ANM(pdb.getTitle()) anm.buildHessian(select, cutoff, gamma) anm.calcModes(nmodes) LOGGER.info('Writing numerical output.') if kwargs.get('outnpz'): prody.saveModel(anm, join(outdir, prefix)) prody.writeNMD(join(outdir, prefix + '.nmd'), anm, select) extend = kwargs.get('extend') if extend: if extend == 'all': extended = prody.extendModel(anm, select, pdb) else: extended = prody.extendModel(anm, select, select | pdb.bb) prody.writeNMD(join(outdir, prefix + '_extended_' + extend + '.nmd'), *extended) outall = kwargs.get('outall') delim = kwargs.get('numdelim') ext = kwargs.get('numext') format = kwargs.get('numformat') if outall or kwargs.get('outeig'): prody.writeArray(join(outdir, prefix + '_evectors' + ext), anm.getArray(), delimiter=delim, format=format) prody.writeArray(join(outdir, prefix + '_evalues' + ext), anm.getEigvals(), delimiter=delim, format=format) if outall or kwargs.get('outbeta'): from prody.utilities import openFile fout = openFile(prefix + '_beta.txt', 'w', folder=outdir) fout.write( '{0[0]:1s} {0[1]:4s} {0[2]:4s} {0[3]:5s} {0[4]:5s}\n'.format( ['C', 'RES', '####', 'Exp.', 'The.'])) for data in zip(select.getChids(), select.getResnames(), select.getResnums(), select.getBetas(), prody.calcTempFactors(anm, select)): fout.write( '{0[0]:1s} {0[1]:4s} {0[2]:4d} {0[3]:5.2f} {0[4]:5.2f}\n'. format(data)) fout.close() if outall or kwargs.get('outcov'): prody.writeArray(join(outdir, prefix + '_covariance' + ext), anm.getCovariance(), delimiter=delim, format=format) if outall or kwargs.get('outcc') or kwargs.get('outhm'): cc = prody.calcCrossCorr(anm) if outall or kwargs.get('outcc'): prody.writeArray(join(outdir, prefix + '_cross-correlations' + ext), cc, delimiter=delim, format=format) if outall or kwargs.get('outhm'): prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'), cc, resnum=select.getResnums(), xlabel='Residue', ylabel='Residue', title=anm.getTitle() + ' cross-correlations') if outall or kwargs.get('hessian'): prody.writeArray(join(outdir, prefix + '_hessian' + ext), anm.getHessian(), delimiter=delim, format=format) if outall or kwargs.get('kirchhoff'): prody.writeArray(join(outdir, prefix + '_kirchhoff' + ext), anm.getKirchhoff(), delimiter=delim, format=format) if outall or kwargs.get('outsf'): prody.writeArray(join(outdir, prefix + '_sqflucts' + ext), prody.calcSqFlucts(anm), delimiter=delim, format=format) figall = kwargs.get('figall') cc = kwargs.get('figcc') sf = kwargs.get('figsf') bf = kwargs.get('figbeta') cm = kwargs.get('figcmap') if figall or cc or sf or bf or cm: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning('Matplotlib could not be imported. ' 'Figures are not saved.') else: prody.SETTINGS['auto_show'] = False LOGGER.info('Saving graphical output.') format = kwargs.get('figformat') width = kwargs.get('figwidth') height = kwargs.get('figheight') dpi = kwargs.get('figdpi') format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(anm) plt.savefig(join(outdir, prefix + '_cc.' + format), dpi=dpi, format=format) plt.close('all') if figall or cm: plt.figure(figsize=(width, height)) prody.showContactMap(anm) plt.savefig(join(outdir, prefix + '_cm.' + format), dpi=dpi, format=format) plt.close('all') if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(anm) plt.savefig(join(outdir, prefix + '_sf.' + format), dpi=dpi, format=format) plt.close('all') if figall or bf: plt.figure(figsize=(width, height)) bexp = select.getBetas() bcal = prody.calcTempFactors(anm, select) plt.plot(bexp, label='Experimental') plt.plot(bcal, label=('Theoretical (R={0:.2f})'.format( np.corrcoef(bcal, bexp)[0, 1]))) plt.legend(prop={'size': 10}) plt.xlabel('Node index') plt.ylabel('Experimental B-factors') plt.title(pdb.getTitle() + ' B-factors') plt.savefig(join(outdir, prefix + '_bf.' + format), dpi=dpi, format=format) plt.close('all')
def prody_gnm(pdb, **kwargs): """Perform GNM calculations for *pdb*. """ for key in DEFAULTS: if not key in kwargs: kwargs[key] = DEFAULTS[key] from os.path import isdir, splitext, join outdir = kwargs.get("outdir") if not isdir(outdir): raise IOError("{0} is not a valid path".format(repr(outdir))) import numpy as np import prody LOGGER = prody.LOGGER selstr = kwargs.get("select") prefix = kwargs.get("prefix") cutoff = kwargs.get("cutoff") gamma = kwargs.get("gamma") nmodes = kwargs.get("nmodes") selstr = kwargs.get("select") model = kwargs.get("model") pdb = prody.parsePDB(pdb, model=model) if prefix == "_gnm": prefix = pdb.getTitle() + "_gnm" select = pdb.select(selstr) if select is None: raise ValueError("selection {0} do not match any atoms".format(repr(selstr))) LOGGER.info("{0} atoms will be used for GNM calculations.".format(len(select))) gnm = prody.GNM(pdb.getTitle()) gnm.buildKirchhoff(select, cutoff, gamma) gnm.calcModes(nmodes) LOGGER.info("Writing numerical output.") if kwargs.get("outnpz"): prody.saveModel(gnm, join(outdir, prefix)) prody.writeNMD(join(outdir, prefix + ".nmd"), gnm, select) extend = kwargs.get("extend") if extend: if extend == "all": extended = prody.extendModel(gnm, select, pdb) else: extended = prody.extendModel(gnm, select, select | pdb.bb) prody.writeNMD(join(outdir, prefix + "_extended_" + extend + ".nmd"), *extended) outall = kwargs.get("outall") delim = kwargs.get("numdelim") ext = kwargs.get("numext") format = kwargs.get("numformat") if outall or kwargs.get("outeig"): prody.writeArray(join(outdir, prefix + "_evectors" + ext), gnm.getArray(), delimiter=delim, format=format) prody.writeArray(join(outdir, prefix + "_evalues" + ext), gnm.getEigvals(), delimiter=delim, format=format) if outall or kwargs.get("outbeta"): from prody.utilities import openFile fout = openFile(prefix + "_beta.txt", "w", folder=outdir) fout.write("{0[0]:1s} {0[1]:4s} {0[2]:4s} {0[3]:5s} {0[4]:5s}\n".format(["C", "RES", "####", "Exp.", "The."])) for data in zip( select.getChids(), select.getResnames(), select.getResnums(), select.getBetas(), prody.calcTempFactors(gnm, select), ): fout.write("{0[0]:1s} {0[1]:4s} {0[2]:4d} {0[3]:5.2f} {0[4]:5.2f}\n".format(data)) fout.close() if outall or kwargs.get("outcov"): prody.writeArray( join(outdir, prefix + "_covariance" + ext), gnm.getCovariance(), delimiter=delim, format=format ) if outall or kwargs.get("outcc") or kwargs.get("outhm"): cc = prody.calcCrossCorr(gnm) if outall or kwargs.get("outcc"): prody.writeArray(join(outdir, prefix + "_cross-correlations" + ext), cc, delimiter=delim, format=format) if outall or kwargs.get("outhm"): prody.writeHeatmap( join(outdir, prefix + "_cross-correlations.hm"), cc, resnum=select.getResnums(), xlabel="Residue", ylabel="Residue", title=gnm.getTitle() + " cross-correlations", ) if outall or kwargs.get("kirchhoff"): prody.writeArray(join(outdir, prefix + "_kirchhoff" + ext), gnm.getKirchhoff(), delimiter=delim, format=format) if outall or kwargs.get("outsf"): prody.writeArray( join(outdir, prefix + "_sqfluct" + ext), prody.calcSqFlucts(gnm), delimiter=delim, format=format ) figall = kwargs.get("figall") cc = kwargs.get("figcc") sf = kwargs.get("figsf") bf = kwargs.get("figbeta") cm = kwargs.get("figcmap") modes = kwargs.get("figmode") if figall or cc or sf or bf or cm or modes: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning("Matplotlib could not be imported. " "Figures are not saved.") else: prody.SETTINGS["auto_show"] = False LOGGER.info("Saving graphical output.") format = kwargs.get("figformat") width = kwargs.get("figwidth") height = kwargs.get("figheight") dpi = kwargs.get("figdpi") format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(gnm) plt.savefig(join(outdir, prefix + "_cc." + format), dpi=dpi, format=format) plt.close("all") if figall or cm: plt.figure(figsize=(width, height)) prody.showContactMap(gnm) plt.savefig(join(outdir, prefix + "_cm." + format), dpi=dpi, format=format) plt.close("all") if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(gnm) plt.savefig(join(outdir, prefix + "_sf." + format), dpi=dpi, format=format) plt.close("all") if figall or bf: plt.figure(figsize=(width, height)) bexp = select.getBetas() bcal = prody.calcTempFactors(gnm, select) plt.plot(bexp, label="Experimental") plt.plot(bcal, label=("Theoretical (corr coef = {0:.2f})".format(np.corrcoef(bcal, bexp)[0, 1]))) plt.legend(prop={"size": 10}) plt.xlabel("Node index") plt.ylabel("Experimental B-factors") plt.title(pdb.getTitle() + " B-factors") plt.savefig(join(outdir, prefix + "_bf." + format), dpi=dpi, format=format) plt.close("all") if modes: indices = [] items = modes.split() items = sum([item.split(",") for item in items], []) for item in items: try: item = item.split("-") if len(item) == 1: indices.append(int(item[0]) - 1) elif len(item) == 2: indices.extend(range(int(item[0]) - 1, int(item[1]))) except: pass for index in indices: try: mode = gnm[index] except: pass else: plt.figure(figsize=(width, height)) prody.showMode(mode) plt.grid() plt.savefig( join(outdir, prefix + "_mode_" + str(mode.getIndex() + 1) + "." + format), dpi=dpi, format=format, ) plt.close("all")
def prody_gnm(pdb, **kwargs): """Perform GNM calculations for *pdb*. """ for key in DEFAULTS: if not key in kwargs: kwargs[key] = DEFAULTS[key] from os.path import isdir, splitext, join outdir = kwargs.get('outdir') if not isdir(outdir): raise IOError('{0} is not a valid path'.format(repr(outdir))) import numpy as np import prody LOGGER = prody.LOGGER selstr = kwargs.get('select') prefix = kwargs.get('prefix') cutoff = kwargs.get('cutoff') gamma = kwargs.get('gamma') nmodes = kwargs.get('nmodes') selstr = kwargs.get('select') model = kwargs.get('model') altloc = kwargs.get('altloc') zeros = kwargs.get('zeros') pdb = prody.parsePDB(pdb, model=model, altloc=altloc) if prefix == '_gnm': prefix = pdb.getTitle() + '_gnm' select = pdb.select(selstr) if select is None: raise ValueError('selection {0} do not match any atoms'.format( repr(selstr))) LOGGER.info('{0} atoms will be used for GNM calculations.'.format( len(select))) gnm = prody.GNM(pdb.getTitle()) nproc = kwargs.get('nproc') if nproc: try: from threadpoolctl import threadpool_limits except ImportError: raise ImportError( 'Please install threadpoolctl to control threads') with threadpool_limits(limits=nproc, user_api="blas"): gnm.buildKirchhoff(select, cutoff, gamma) gnm.calcModes(nmodes, zeros=zeros) else: gnm.buildKirchhoff(select, cutoff, gamma) gnm.calcModes(nmodes, zeros=zeros) LOGGER.info('Writing numerical output.') if kwargs.get('outnpz'): prody.saveModel(gnm, join(outdir, prefix)) if kwargs.get('outscipion'): prody.writeScipionModes(outdir, gnm) prody.writeNMD(join(outdir, prefix + '.nmd'), gnm, select) extend = kwargs.get('extend') if extend: if extend == 'all': extended = prody.extendModel(gnm, select, pdb) else: extended = prody.extendModel(gnm, select, select | pdb.bb) prody.writeNMD(join(outdir, prefix + '_extended_' + extend + '.nmd'), *extended) outall = kwargs.get('outall') delim = kwargs.get('numdelim') ext = kwargs.get('numext') format = kwargs.get('numformat') if outall or kwargs.get('outeig'): prody.writeArray(join(outdir, prefix + '_evectors' + ext), gnm.getArray(), delimiter=delim, format=format) prody.writeArray(join(outdir, prefix + '_evalues' + ext), gnm.getEigvals(), delimiter=delim, format=format) if outall or kwargs.get('outbeta'): from prody.utilities import openFile fout = openFile(prefix + '_beta' + ext, 'w', folder=outdir) fout.write( '{0[0]:1s} {0[1]:4s} {0[2]:4s} {0[3]:5s} {0[4]:5s}\n'.format( ['C', 'RES', '####', 'Exp.', 'The.'])) for data in zip(select.getChids(), select.getResnames(), select.getResnums(), select.getBetas(), prody.calcTempFactors(gnm, select)): fout.write( '{0[0]:1s} {0[1]:4s} {0[2]:4d} {0[3]:5.2f} {0[4]:5.2f}\n'. format(data)) fout.close() if outall or kwargs.get('outcov'): prody.writeArray(join(outdir, prefix + '_covariance' + ext), gnm.getCovariance(), delimiter=delim, format=format) if outall or kwargs.get('outcc') or kwargs.get('outhm'): cc = prody.calcCrossCorr(gnm) if outall or kwargs.get('outcc'): prody.writeArray(join(outdir, prefix + '_cross-correlations' + ext), cc, delimiter=delim, format=format) if outall or kwargs.get('outhm'): prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'), cc, resnum=select.getResnums(), xlabel='Residue', ylabel='Residue', title=gnm.getTitle() + ' cross-correlations') if outall or kwargs.get('kirchhoff'): prody.writeArray(join(outdir, prefix + '_kirchhoff' + ext), gnm.getKirchhoff(), delimiter=delim, format=format) if outall or kwargs.get('outsf'): prody.writeArray(join(outdir, prefix + '_sqfluct' + ext), prody.calcSqFlucts(gnm), delimiter=delim, format=format) figall = kwargs.get('figall') cc = kwargs.get('figcc') sf = kwargs.get('figsf') bf = kwargs.get('figbeta') cm = kwargs.get('figcmap') modes = kwargs.get('figmode') if figall or cc or sf or bf or cm or modes: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning('Matplotlib could not be imported. ' 'Figures are not saved.') else: prody.SETTINGS['auto_show'] = False LOGGER.info('Saving graphical output.') format = kwargs.get('figformat') width = kwargs.get('figwidth') height = kwargs.get('figheight') dpi = kwargs.get('figdpi') format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(gnm) plt.savefig(join(outdir, prefix + '_cc.' + format), dpi=dpi, format=format) plt.close('all') if figall or cm: plt.figure(figsize=(width, height)) prody.showContactMap(gnm) plt.savefig(join(outdir, prefix + '_cm.' + format), dpi=dpi, format=format) plt.close('all') if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(gnm) plt.savefig(join(outdir, prefix + '_sf.' + format), dpi=dpi, format=format) plt.close('all') if figall or bf: plt.figure(figsize=(width, height)) bexp = select.getBetas() bcal = prody.calcTempFactors(gnm, select) plt.plot(bexp, label='Experimental') plt.plot(bcal, label=('Theoretical (corr coef = {0:.2f})'.format( np.corrcoef(bcal, bexp)[0, 1]))) plt.legend(prop={'size': 10}) plt.xlabel('Node index') plt.ylabel('Experimental B-factors') plt.title(pdb.getTitle() + ' B-factors') plt.savefig(join(outdir, prefix + '_bf.' + format), dpi=dpi, format=format) plt.close('all') if modes: indices = [] items = modes.split() items = sum([item.split(',') for item in items], []) for item in items: try: item = item.split('-') if len(item) == 1: indices.append(int(item[0]) - 1) elif len(item) == 2: indices.extend( list(range(int(item[0]) - 1, int(item[1])))) except: pass for index in indices: try: mode = gnm[index] except: pass else: plt.figure(figsize=(width, height)) prody.showMode(mode) plt.grid() plt.savefig(join( outdir, prefix + '_mode_' + str(mode.getIndex() + 1) + '.' + format), dpi=dpi, format=format) plt.close('all')
def align(): global wd ans = wd + '/challengedata/answers' if os.path.isdir( ans) == False: #if the answers directory isnt formed make it os.mkdir(wd + '/challengedata/answers') rddir = wd + '/challengedata/rdkit-scripts' if os.path.isdir(rddir) == False: a = 'git clone https://github.com/dkoes/rdkit-scripts' os.system(a) data = os.listdir(wd + '/challengedata') for x in (data): #for each weeks data if x == "readme.txt" or x == "latest.txt" or x == "answers" or x == "rdkit-scripts" or x == 'PDBfiles' or x == 'visual.txt': pass else: toDir = wd + '/challengedata/answers/' + x if os.path.isdir( toDir ) == False: #if the path to answers dir doesnt exist os.mkdir(toDir) #make directory dock = os.listdir(wd + '/challengedata/' + x) for y in (dock): a = str(os.getcwd() + '/answers/' + x + '/' + y + '/lmcss_docked.sdf') if y == 'readme.txt' or y == 'new_release_structure_sequence_canonical.tsv' or y == 'new_release_structure_nonpolymer.tsv' or y == 'new_release_crystallization_pH.tsv' or y == 'new_release_structure_sequence.tsv': pass elif (os.path.isfile(a) == True): pass else: input = os.listdir(wd + '/challengedata/' + x + '/' + y) for z in (input): if z.startswith("LMCSS") and z.endswith(".pdb"): if (z.endswith("lig.pdb")): pass else: id = z.strip('.pdb') sts = str("grep ATOM " + z + " > lmcss_rec.pdb" ) #creates receptor .pdb file cd = wd + '/challengedata' os.chdir( cd + '/' + x + '/' + y) #change directory to week/ligand os.system( sts ) #runs and creates receptor .pbd file os.chdir(cd) #back to challenge directory input = os.listdir( cd + '/' + x + '/' + y ) #lists files inside ligand in certain week for z in (input): if z.endswith( ".smi" ): # changes .smi -> lig.sdf cd = str(os.getcwd()) sts = str(" " + cd + '/' + x + '/' + y + '/' + z + " lig.sdf --maxconfs 1") os.chdir(cd + '/' + x + '/' + y) os.system( cd + '/rdkit-scripts/rdconf.py' + sts) os.chdir(cd) for z in (input): # runs smina if z.endswith("lig.pdb"): sts = str( "smina -r lmcss_rec.pdb -l lig.sdf --autobox_ligand " + z + " -o " + id + "_docked.sdf") cd = str( os.getcwd()) #lignad directory os.chdir(cd + '/' + x + '/' + y) #os.system(sts) sts = str( "smina -r lmcss_rec.pdb -l lig.sdf --autobox_ligand " + z + " -o lmcss_docked.sdf") cd = str( os.getcwd()) #lignad directory os.chdir(cd + '/' + x + '/' + y) os.system(sts) os.chdir(cd) cur = str(os.getcwd() + '/answers/' + x + '/' + y) if (os.path.isdir(cur) == True): os.chdir(cd + '/' + x + '/' + y) os.getcwd() ## input = os.listdir(cd + '/' + x + '/' + y) for i in (input): if i.endswith( ".txt" ) and i != "center.txt" and i != "visual.txt": f = open(i) lines = f.readlines() ligand = lines[2].strip( 'ligand, ') ligand = ligand.replace( '\n', '') ligand = str(ligand) #gets the ligand from txt file if i.endswith("lig.pdb"): #see if pdb exists prody.fetchPDB(y) proteinPDB = prody.parsePDB(y) ourPDB = prody.parsePDB( 'lmcss_rec.pdb') a, b, seqid, overlap = prody.matchChains( proteinPDB, ourPDB)[0] b, protein_sp = prody.superpose( b, a, weights=None) b.select(ligand + '_ligand.pdb') sts = str("obrms -f " + i + ' ' + id + "_docked.sdf") #run obrms # parse results and output to the visualization txt file os.system(sts) f = open('visual.txt', 'ab+') f.write(x + ' smina ' + y + '\n') f.close curdir = str(cd + '/' + x + '/' + y + '/' + id + '_docked.sdf') print(input) ## for i in (input): if i.endswith("lig.pdb"): #see if pdb exists protein = prody.fetchPDB(y) #NEED NUMPY ARRAY prody.writeArray( 'lmcss_docked_array.sdf', array) prody.superpose( 'lmcss_docked.sdf', protein, weights=None) sts = str("obrms -f " + i + " lmcss_docked.sdf") #run obrms # parse results and output to the visualization txt file os.system(sts) os.chdir(wd + '/challengedata/') f = open('visual.txt', 'ab+') f.write(x + ' smina ' + y + '\n') f.close curdir = str( cd + '/' + x + '/' + y + '/lmcss_docked.sdf') todir = str(cd + '/answers/' + x + '/' + y + '/') shutil.copy(curdir, todir) print(curdir) break os.chdir(wd) else: os.mkdir(cur) os.chdir(cd + '/' + x + '/' + y) input = os.listdir(cd + '/' + x + '/' + y) for i in (input): if i.endswith( ".txt" ) and i != "center.txt" and i != "visual.txt": f = open(i) lines = f.readlines() ligand = lines[2].strip( "ligand, ") ligand = ligand.replace( '\n', '') ligand = str(ligand) #gets ligand from txt file if i.endswith("lig.pdb"): prody.fetchPDB(y) proteinPDB = prody.parsePDB(y) ourPDB = prody.parsePDB( 'lmcss_rec.pdb') prody.matchChains( proteinPDB, ourPDB) protein_sp = prody.superpose( ourPDB, proteinPDB, weights=None) protein_sp.select( ligand + '_ligand.pdb') sts = str("obrms -f " + i + ' ' + id + "_docked.sdf") os.system(sts) f = open('visual.txt', 'ab+') f.write(x + ' smina ' + y + '\n') f.close curdir = str(cd + '/' + x + '/' + y + '/' + id + '_docked.sdf') if i.endswith("lig.pdb"): protein = prody.fetchPDB(y) prody.writeArray( 'lmcss_docked_array.sdf', array) prody.superpose( 'lmcss_docked.sdf', protein, weights=None) sts = str("obrms -f " + i + " lmcss_docked.sdf") os.system(sts) os.chdir(wd + '/challengedata/') f = open('visual.txt', 'ab+') f.write(x + ' smina ' + y + '\n') f.close curdir = str( cd + '/' + x + '/' + y + '/lmcss_docked.sdf') todir = str(cd + '/answers/' + x + '/' + y + '/') shutil.copy(curdir, todir) print(curdir) break os.chdir(wd)