def CapC(Pdb, OutPdbFile=None): "Adds c-terminal NME cap to a pdb file." #this uses proteinclass Pdb = ReturnPdbData(Pdb) p = protein.ProteinClass(Pdb=Pdb) CapCRes = protein.ProteinClass(Seq="<") p = p + CapCRes OutPdb = p.GetPdb() SavePdbData(OutPdb, OutPdbFile) return OutPdb
def CapN(Pdb, OutPdbFile=None): "Adds n-terminal ACE cap to a pdb file." #this uses proteinclass Pdb = ReturnPdbData(Pdb) p = protein.ProteinClass(Pdb=Pdb) CapNRes = protein.ProteinClass(Seq=">") p = CapNRes + p OutPdb = p.GetPdb() SavePdbData(OutPdb, OutPdbFile) return OutPdb
def Splice(Pdb1, Pdb2, OutPdbFile=None): "Joins two pdb files together." #this uses proteinclass Pdb1 = ReturnPdbData(Pdb1) Pdb2 = ReturnPdbData(Pdb2) p1 = protein.ProteinClass(Pdb=Pdb1) p2 = protein.ProteinClass(Pdb=Pdb2) p3 = p1 + p2 OutPdb = p3.GetPdb() SavePdbData(OutPdb, OutPdbFile) return OutPdb
def Cap(Pdb, OutPdbFile=None): "Adds caps ACE and NME to a pdb file." #this uses proteinclass Pdb = ReturnPdbData(Pdb) p = protein.ProteinClass(Pdb=Pdb) CapNRes = protein.ProteinClass(Seq=">") CapCRes = protein.ProteinClass(Seq="<") p = CapNRes + p + CapCRes OutPdb = p.GetPdb() SavePdbData(OutPdb, OutPdbFile) return OutPdb
def SpliceOpt(Pdb1, Pdb2, OutPdbFile=None, N=5): "Splices two pdbs together and rotates to optimize for non-overlap." #this uses proteinclass Pdb1 = ReturnPdbData(Pdb1) Pdb2 = ReturnPdbData(Pdb2) p1 = protein.ProteinClass(Pdb=Pdb1) p2 = protein.ProteinClass(Pdb=Pdb2) p3 = p1 + p2 if len(p1) > 0 and len(p2) > 0: p3.OptimizePep(len(p1), N) OutPdb = p3.GetPdb() SavePdbData(OutPdb, OutPdbFile) return OutPdb
def Extend(Pdb, SeqBefore, SeqAfter, OutPdbFile=None, Opt=True, N=5): "Extends a pdbfile in either or both directions with given sequences." #uses proteinclass OutPdb = ReturnPdbData(Pdb) p = protein.ProteinClass(Pdb=Pdb) n = len(p) if len(SeqBefore) > 0: p = protein.ProteinClass(Seq=SeqBefore) + p if Opt and n > 0: p.OptimizePep(len(SeqBefore), N) if len(SeqAfter) > 0: p = p + protein.ProteinClass(Seq=SeqAfter) if Opt and n > 0: p.OptimizePep(n, N) OutPdb = p.GetPdb() SavePdbData(OutPdb, OutPdbFile) return OutPdb
def Center(Pdb, OutPdbFile=None): "Centers a pdb file." p = protein.ProteinClass(Pdb=Pdb) p.Center() OutPdb = p.GetPdb() SavePdbData(OutPdb, OutPdbFile) return OutPdb
def RMSDPdb(PdbFile1, PdbFile2, Center=True, Backbone=True, AlignSeq=False, CompResInd=None, CalcResInd=None): p1 = protein.ProteinClass(Pdb=PdbFile1) p2 = protein.ProteinClass(Pdb=PdbFile2) return RMSDProteinClass(p1, p2, Center=Center, Backbone=Backbone, AlignSeq=AlignSeq, CompResInd=CompResInd, CalcResInd=CalcResInd)
def __init__(self, cfg, Pdb=None, Seq=None, Model=None, Prefix='ncos'): if Verbose: print 'Initializing a NCOS protein object...' # set Prefix self.Prefix = Prefix # has special GLY Params self.hasSpecialBBGLYAngles = cfg.hasSpecialBBGLYAngles self.hasSpecialBBGLYTorsions = cfg.hasSpecialBBGLYTorsions self.hasSpecialGLYParams = cfg.hasSpecialGLYParams() # has special PRO params self.hasSpecialBBPROAngles = cfg.hasSpecialBBPROAngles self.hasSpecialBBPROTorsions = cfg.hasSpecialBBPROTorsions self.hasSpecialPROParams = cfg.hasSpecialPROParams() # has special sidechains for glycine self.hasPseudoGLY = cfg.hasPseudoGLY() if self.hasSpecialGLYParams and self.hasPseudoGLY: print 'Error: Cannot have pseudo GLY side chain and special GLY BB torsion simultaneously' exit() # sidechain referencing self.SSRefType = cfg.SSRefType # extract the entire config object just in case self.cfg = cfg # if a CG Pdb is provided instead of a Seq if Seq is None: # internal proteinclass object self.p0 = protein.ProteinClass(Pdb, Model=Model) self.Seq = self.p0.Seq self.Pos = self.p0.Pos self.ResChainInds = [ self.p0.ResChain(i) for i, r in enumerate(self.Seq) ] self.Chains = self.p0.Chains self.NChains = len(self.Chains) # if a Seq is provided else: self.Seq = None self.Pos = None self.ResChainInds = [] self.Chains = [] self.NChains = None self.__SetChains(Seq) print 'Sequence: %s' % (' '.join(self.Seq)) # sequence book-keeping self.ResTypes = list(set(self.Seq)) self.NRes = len(self.Seq) self.NResTypes = len(self.ResTypes) # unpack sidechain atomtypes and assign to this object self.AtomSbyNum = [] self.AtomSbyRes = {} self.__SetSideChains() # build startatoms for quick backbone referencing self.StartAtomInds = [] self.RelativeStartAtomInds = [] self.__SetStartAtomInds() self.__SetRelativeStartAtomInds() # atomnames self.AtomNames = [] self.__SetAtomNames() # generate bonds self.BondPairs = None self.__SetBonds()
def Standardize(Pdb, OutPdbFile=None): "Formats to standard residue and atom names, etc." Pdb = ReturnPdbData(Pdb) OutPdb = [] for l in Pdb.split("\n"): if l.startswith("ATOM"): #fix histidines if l[17:20] in ["HID", "HIE", "HIP"]: l = l[:17] + "HIS" + l[20:] elif l[17:20] in ["CYX", "CYM"]: l = l[:17] + "CYS" + l[20:] #fix amide hydrogens if l[12:16] == " HN ": l = l[:12] + " H " + l[16:] elif l.startswith("HETATM") and l[17:20] == 'MSE': #replace selenomethionine l = "ATOM " + l[6:17] + "MET" + l[20:] #check alternate location indicator if l.startswith("ATOM"): if l[16] == "A": l = l[:16] + " " + l[17:] elif not l[16] == " ": continue OutPdb.append(l) OutPdb = "\n".join(OutPdb) OutPdb = Renumber(OutPdb) #now generate chains p = protein.ProteinClass(Pdb=OutPdb) p.GenerateChains() OutPdb = p.GetPdb() SavePdbData(OutPdb, OutPdbFile) return OutPdb
def BFactorRMSD(RefPdb, Pdb, OutPdbFile=None): "Adds BFactors proportional to atom RMSD." import rmsd Pdb = ReturnPdbData(Pdb) RefPdb = ReturnPdbData(RefPdb) p = protein.ProteinClass(Pdb=Pdb) pref = protein.ProteinClass(Pdb=RefPdb) r, NRes = rmsd.RMSDProteinClass(pref, p, Backbone=False, AlignSeq=True, UpdateBFactors=True, AlignAtoms=True) OutPdb = p.GetPdb() SavePdbData(OutPdb, OutPdbFile) return OutPdb
def Generate(Seq, OutPdbFile=None): "Generates a pdb file of extended sequence Seq." #uses proteinclass p = protein.ProteinClass(Seq=Seq) OutPdb = p.GetPdb() SavePdbData(OutPdb, OutPdbFile) return OutPdb
def Map2Polymer(Pdb, AAPdb, PolyName, Model = None, MappedPrefix = None, hasPseudoGLY = True, DelTmpPdb = True): ''' maps the given (CG) Pdb to a polymer of equivalent length and returns a CG Pdb that is mapped to a poly-peptide (PolyName) of equivalent length Energy minimization not yet implemented''' # read in unmapped AA Pdb p_AA = protein.ProteinClass(AAPdb, Model = Model) p_AA = p_AA.Decap() # map this to a polymer of equivalent length print 'Mapping structure to a %s-%d sequence' % (PolyName, len(p_AA.Seq)) PolySeq = [PolyName] * len(p_AA.Seq) p_AA = p_AA.MutateSeq(PolySeq) PolyAAPdb = 'polyAA.pdb' p_AA.WritePdb(PolyAAPdb) # coarse grain this pdb PolyCGPdb = 'polyCG.pdb' Map(InPdb = PolyAAPdb, CGPrefix = 'polyCG', hasPseudoGLY = hasPseudoGLY) # write coarse grained co-ordinates using given Pdb seq p_CG = protein.ProteinClass(Pdb) p_PolyCG = protein.ProteinClass(PolyCGPdb) p_CG.Pos = p_PolyCG.Pos if not MappedPrefix is None: MappedPdb = MappedPrefix + '.pdb' else: PdbName = Pdb.split('/')[-1].split('.pdb')[0] MappedPdb = os.path.join(os.getcwd(), PdbName + '_map2%s.pdb' % PolyName.lower()) p_CG.WritePdb(MappedPdb) # copy over CONECT records if present s = '' with open(Pdb, 'r') as of: lines = of.readlines() try: start = [lines.index(line) for line in lines if line.startswith('CONECT')][0] stop = len(lines) s = ''.join(lines[start:stop]) except ValueError: s = '' s0 = file(MappedPdb, 'r').read() if s: s0 += '\n' s0 += s file(MappedPdb, 'w').write(s0) # del temp files if DelTmpPdb: for i in [PolyAAPdb, PolyCGPdb]: os.remove(i) return MappedPdb
def ExtractConect(NativePdb, Pdb): # clustered pdbs come with all cluster structs # followed by CONECT records at the end. This parses # CONECT records from the native pdb and places it after # the top cluster struct p = protein.ProteinClass(Pdb) pdbstr = p.GetPdb() s = file(NativePdb, 'r').readlines() start = [s.index(i) for i in s if i.startswith('CONECT')][0] stop = len(s) conectstr = ''.join(s[start:stop]) return pdbstr + '\n' + conectstr
def GenRandInitPos(self): '''generate random initial position based on /share/apps/scripts/template.pdb to run from a different seed structure just place a cg pdb called init.pdb with that structure''' tmpPdb = os.path.join(os.getcwd(), 'tmp.pdb') if self.InitPdb is None: self.InitPdb = 'init.pdb' if not os.path.isfile(self.InitPdb): print 'Generating fully extended initial AA structure. ', # create an ALL-ATOM protein class object for the given sequence pobj = protein.ProteinClass(Seq=self.p.Seq) pobj.WritePdb(tmpPdb) # coarse grain all-atom proteinclass object mapNCOS.Map(InPdb=tmpPdb, CGPrefix=self.InitPdb.split('.pdb')[0], hasPseudoGLY=self.p.hasPseudoGLY) # remove all-atom pdb if os.path.isfile(tmpPdb): os.remove(tmpPdb) del pobj print 'Using init conf as generated in : %s\n' % self.InitPdb pobj = protein.ProteinClass(self.InitPdb) initpos = pobj.Pos return initpos
def RandDihedrals(Pdb, OutPdbFile=None, DeltaAng=5.): "Adds a random angle to the torsions along the backbone." #this uses proteinclass Pdb = ReturnPdbData(Pdb) p = protein.ProteinClass(Pdb=Pdb) for i in range(0, len(p)): Phi, Psi = p.PhiPsi(i) if not Phi is None and not Psi is None: Phi += DeltaAng * (2. * random.random() - 1.) Psi += DeltaAng * (2. * random.random() - 1.) p.RotateToPhiPsi(i, Phi, Psi) OutPdb = p.GetPdb() SavePdbData(OutPdb, OutPdbFile) return OutPdb
def Rotate(Pdb, ResNum, Phi=None, Psi=None, OutPdbFile=None): "Rotates a residue to specified phi, psi." #this uses proteinclass Pdb = ReturnPdbData(Pdb) p = protein.ProteinClass(Pdb=Pdb) if not ResNum in range(0, len(p)): raise IndexError, "Residue number %d not found" % ResNum try: p.RotateToPhiPsi(ResNum, Phi, Psi) except StandardError: print "Could not perform rotation." return Pdb OutPdb = p.GetPdb() SavePdbData(OutPdb, OutPdbFile) return OutPdb
def __init__(self, Pdb, Model=None): # protected (internal) proteinclass object self.Pdb = Pdb self.__p = protein.ProteinClass(Pdb, Model=Model) # copy over some attributes from the original proteinclass self.AtomNames = self.__p.AtomNames() self.AtomRes = self.__p.AtomRes() self.AtomResNum = self.__p.AtomResNum # sequence manipulation self.UpdateSeq() # set atom start indices self.StartInds = [] self.GetStartInds() # bacbone indices self.BBInds = self.GetBBInds() # Co-ordinates manipulation self.Pos = None self.UpdatePos()
def f(ax1, ax2, pset, fftype, plotnative=True): N = len(pset) L = np.zeros(N) native_co = np.zeros(N) traj_co = np.zeros(N) traj_rmsd = np.zeros(N) for i, p in enumerate(pset): nativepdb = os.path.expanduser('~/Go/native_struct/mapped/%s.pdb' % p) prot = protein.ProteinClass(nativepdb) L[i] = len(prot.Seq) shelf = os.path.join(Dir, fftype, p, 'prot_%s.shelf' % p) d = shelve.open(shelf) rmsdhist = d['rmsd'] avgrmsd = np.sum(rmsdhist[0] * rmsdhist[1]) / np.sum(rmsdhist[1]) nativeco, cohist = d['co'] avgco = np.sum(cohist[0] * cohist[1]) / np.sum(cohist[1]) native_co[i] = nativeco traj_co[i] = avgco traj_rmsd[i] = avgrmsd d.close() ind = np.argsort(L) s = ', '.join([pset[i] for i in ind]) print s if plotnative: ax1.plot(L[ind], native_co[ind], color='blue', marker='o', label='Native') ax1.plot(L[ind], traj_co[ind], color=Clrs[fftype], marker='o', label=fftype) ax2.plot(L[ind], traj_rmsd[ind], color=Clrs[fftype], marker='o', markersize=8, label=fftype) ax1.legend() ax2.legend()
def RunAnal(CoordsObj, OutputPath=None, Prefix=None): "Runs mesostring analysis of a coords object." #link proteinclass p = protein.ProteinClass() p.LinkCoordsObj(CoordsObj) #loop through configurations ConfMeso = [] CoordsObj.Reset() Mask = MesoMask(p) while True: Pos = CoordsObj.GetNextCoords() if Pos is None: break #parse dihedrals Dih = [] for i in range(0, len(p)): Dih.append(p.PhiPsi(i)) ConfMeso.append(MesoStringMask(Dih, Mask)) #unlink p.UnlinkCoordsObj() #compute the number of instances of each mesostring MesoDict = {} for s in ConfMeso: if s in MesoDict: MesoDict[s] += 1 else: MesoDict[s] = 1 #put into a list and sort by population MesoPop = [(p, s) for (s, p) in MesoDict.iteritems()] MesoPop.sort() MesoPop.reverse() MesoPop = [(s, p) for (p, s) in MesoPop] #calculate the mesostate entropy Tot = float(len(ConfMeso)) MesoEntropy = log(Tot) - sum([p * log(p) for (s, p) in MesoPop]) / Tot #write the output ConfIndices = CoordsObj.GetIndices() if not OutputPath is None: WriteAnalysis(OutputPath, ConfMeso, MesoPop, MesoEntropy, Prefix, ConfIndices) return ConfMeso, MesoPop, MesoEntropy
def ReverseMap(CGPdb, Prefix, Model = None, hasPseudoGLY = False): # parse coarse grained pdb p = protein.ProteinClass(CGPdb, Model = Model) Seq = p.Seq Pos = p.Pos PdbString = p.GetPdb() # parse backbone and sidechain atoms NInds = p.AtomInd(AtomName = 'N') CInds = p.AtomInd(AtomName = 'C') OInds = p.AtomInd(AtomName = 'O') SInds = p.AtomInd(AtomName = 'S') # generate approx carbonyl groups for all but last residue s = '' n = 0 CurrentChain = -1 i_gly = 0 for i, r in enumerate(Seq): # determine if a new chain starts here thisChain = p.ResChain(i) if not thisChain == CurrentChain: CurrentChain = thisChain # update Chain number if not i == 0: s += "TER\n" # inter-chain TER records # Amide nitrogen PosN = Pos[NInds[i]] s += PDBFMT % (n+1, 'N ', r, string.ascii_uppercase[thisChain], i+1, PosN[0], PosN[1], PosN[2], 1.0, 0.0) s += '\n' n += 1 # Alpha carbon PosCA = Pos[CInds[i]] s += PDBFMT % (n+1, 'CA ', r, string.ascii_uppercase[thisChain], i+1, PosCA[0], PosCA[1], PosCA[2], 1.0, 0.0) s += '\n' n += 1 # CG oxygen site PosCGO = Pos[OInds[i]] # find effective carbonyl if i < len(Seq) - 1: PosNextN = Pos[NInds[i+1]] PosC, PosO = Project(PosCA, PosCGO, PosNextN) # Carbonyl C s += PDBFMT % (n+1, 'C ', r, string.ascii_uppercase[thisChain], i+1, PosC[0], PosC[1], PosC[2], 1.0, 0.0) s += '\n' n += 1 # Carbonyl O s += PDBFMT % (n+1, 'O ', r, string.ascii_uppercase[thisChain], i+1, PosO[0], PosO[1], PosO[2], 1.0, 0.0) s += '\n' n += 1 else: # retain CG O site for last residue s += PDBFMT % (n+1, 'O ', r, string.ascii_uppercase[thisChain], i+1, PosCGO[0], PosCGO[1], PosCGO[2], 1.0, 0.0) s += '\n' n += 1 # Sidechain if not r == 'GLY' or hasPseudoGLY: PosS = Pos[SInds[i_gly]] s += PDBFMT % (n+1, 'S ', r, string.ascii_uppercase[thisChain], i+1, PosS[0], PosS[1], PosS[2], 1.0, 0.0) s += '\n' n += 1 i_gly += 1 # write AA pdb s += 'TER\n' # terminal record OutPdb = Prefix + '.pdb' with open(OutPdb, 'w') as of: of.write(s)
CalcResInd = GetResList(Args.get("calcres", None)) #check for a trajectory if "traj" in Args["FLAGS"]: PdbRef, TrjFile, PrmtopFile = Args["ARGS"][:3] NAvg = int(Args.get("avg", 1)) Cut = float(Args.get("frac", 0.)) NSkip = int(Args.get("nskip", 0)) NRead = int(Args.get("nread", -1)) NStride = int(Args.get("nstride", 1)) Trj = coords.TrjClass(TrjFile, PrmtopFile, NSkip=NSkip, NRead=NRead, NStride=NStride) pTrj = protein.ProteinClass() pTrj.LinkTrj(Trj) pRef = protein.ProteinClass(Pdb=PdbRef) print "%-10s %-8s %-8s %-5s" % ("Frame", "BB_RMSD", "All_RMSD", "NRes") i = 0 y1, y2 = [], [] z1, z2 = [], [] for Pos in Trj: i += 1 x1, NRes = RMSDProteinClass(pRef, pTrj, Backbone=True, AlignSeq=Align, CompResInd=CompResInd, CalcResInd=CalcResInd) x2, NRes = RMSDProteinClass(pRef,
NSkip=NSkip, NStride=NStride) NAtom = len(RefPos) print "Found %d atoms" % NAtom dSq = zeros(NAtom, float) N = 0 for Pos in t: N += 1 r = rmsd.RMSD(RefPos, Pos, Align=True) dSq += ((RefPos - Pos)**2).sum(axis=1) if N % 100 == 0: print "Analyzed %d frames" % N dSq = dSq / N dSq = sqrt(dSq) print "Analyzed %d frames" % N #s = "FLUCTUATION RESULTS\n" #s += "atom number, root-mean-square fluctuation\n" #for i in range(0,NAtom): # s += "%-4d %-8.2f\n" % (i+1, dSq[i]) #file("fluctresults.txt", "w").write(s) p = protein.ProteinClass(Pdb=PdbFile) i = 0 for r in p.Res: for a in r.Atoms: a.BFactor = dSq[i] i += 1 p.WritePdb(os.path.basename(PdbFile).replace(".pdb", "-fluct.pdb"))
#!/usr/bin/env python import os, sys, numpy as np, cPickle as pickle import matplotlib ; matplotlib.use('Agg') import matplotlib.gridspec as gridspec import matplotlib.cm as cmap import matplotlib.pyplot as plt import protein, cgprotein as cg, utils fftypes = ['ala15', 'leu15', 'val15'] NRes = 15 # create ideal helix pdb p = protein.ProteinClass(['ALA']*15) for i in range(15): p.RotateToPhiPsi(ResNum = i, Phi = -60, Psi = -45) p.WritePdb('ideal_helix_unmapped.pdb') cmd = 'python ~/Go/map.py ideal_helix_unmapped.pdb ./ideal_helix' os.system(cmd) # get ideal beta-hairpin (take the topclust from val15_AA simulations) hairpin_pdb = os.path.abspath('../val15_AA/topclust.pdb') # Trajectories Traj = {'ala_spc': os.path.abspath('../ala_spc_AA/Lammps/ala_spc.300.00.lammpstrj.gz'), 'ala15': os.path.abspath('../ala15_AA/Lammps/ala15.299.00.lammpstrj.gz'), 'leu15': os.path.abspath('../leu15_AA/Lammps/leu15_wham.407.00.lammpstrj.gz'), 'val15': os.path.abspath('../val15_AA/Lammps/val15_wham.367.00.lammpstrj.gz') } Temp = {'ala_spc': 300.0, 'ala15': 299.00, 'leu15': 407.00, 'val15': 367.00} NativePdb = {'ala_spc': 'ideal_helix.pdb', 'ala15': 'ideal_helix.pdb', 'leu15': 'ideal_helix.pdb', 'val15': hairpin_pdb}
IndPdb : input pdb Cutoff : cutoff distance to calculate hydrophobic interaction """ import sys, protein, geometry if len(sys.argv) == 1: print Usage sys.exit() #get arguments Pdb = sys.argv[1] Cutoff = int(sys.argv[2]) p = protein.ProteinClass(Pdb=Pdb) #check residues for hydrophobic residues ResInd = p.ResInd(Hydrophobic=True) Pos = p.ResPos() N = len(p) #print Pos #Determine which hydrophobic residues are within the cutoff for i in range(N): if not i in ResInd: continue for j in range(i + 1, N): if not j in ResInd: continue if geometry.Length(Pos[i] - Pos[j]) <= Cutoff: print "%6d %6d %6d" % (i + 1, j + 1, geometry.Length(Pos[i] - Pos[j]))
CCaps = ['NME', 'NHE'] PDBFMT = "ATOM %5d %4s %3s %1s%4d %8.3f%8.3f%8.3f %5.2f%6.2f" # (atomind + 1, atomname, resname, reschainind, resnum+1, x, y, z) BONDFMT = "%6s%5d%5d\n" # ('CONECT', a + r.StartAtom + 1, b + r.StartAtom + 1) # Inputs InPdb = os.path.abspath(sys.argv[1]) CGPrefix = os.path.abspath(sys.argv[2]) if len(sys.argv) > 2 else 'testcg' AATraj = os.path.abspath(sys.argv[3]) if len(sys.argv) > 3 else None PrmTop = os.path.abspath( sys.argv[4]) if len(sys.argv) > 4 and sys.argv[4] else None AmberEne = os.path.abspath( sys.argv[5]) if len(sys.argv) > 5 and sys.argv[5] else None LastNFrames = int(sys.argv[6]) if len(sys.argv) > 6 else 0 # read in protein structure p = protein.ProteinClass(Pdb=InPdb) p.Update() PdbString = p.GetPdb() Pos = p.Pos # is this structure capped (# adapdted from /share/apps/pdbtools.py) NCap = [ s for s in PdbString.split("\n") if s[0:4] == "ATOM" and s[17:20] in NCaps ] CCap = [ s for s in PdbString.split("\n") if s[0:4] == "ATOM" and s[17:20] in CCaps ] hasNCap = len(NCap) > 0 hasCCap = len(CCap) > 0 # Perform some book-keeping on the sequence
def PdbMesoString(PdbFile): "Returns a string of mesostates for a pdb file." p = protein.ProteinClass(Pdb=PdbFile) Dih = [p.PhiPsi(i) for i in range(0, len(p))] return Mesostring(Dih, p.Seq)
def GetCommon(CObj, RunSS=False, Verbose=False): """Returns residues with common secondary structure, common dihedrals, and common contacts.""" if Verbose: print "Getting common secondary structures and contacts..." SSList, PhiPsiList, PhiPsiSqList = [], [], [] CM = None NFrame = len(CObj) #run through all of the pdbs and tabulate the data CObj.Reset() p = protein.ProteinClass() p.LinkCoordsObj(CObj) Seq = p.Seq SeqLen = len(Seq) for (i, Pos) in enumerate(CObj): if Verbose: if "PdbFileList" in CObj.__dict__: print "Examining pdb %s" % CObj.PdbFileList[i] else: if (i + 1) % 100 == 0: print "Examined %d frames" % (i + 1) if RunSS: SSList.append(p.SecondaryStructure()) ThisPhiPsiList = array([p.PhiPsi(i) for i in range(len(p))], float) PhiPsiList.append(ThisPhiPsiList) if CM is None: CM = p.ResContactMap() else: CM += p.ResContactMap() #find the most commond H or E motif at each point along the chain if RunSS: SSVals = ["H", "E"] ResSS, ResSSFrac = "", [] for i in range(SeqLen): s = [SS[i] for SS in SSList] nVals = [s.count(x) for x in SSVals] ind = argmax(nVals) f = float(nVals[ind]) / float(NFrame) ResSSFrac.append(f) if f >= SSThresh: ResSS += SSVals[ind] else: ResSS += "-" else: ResSS = "-" * SeqLen ResSSFrac = [0.] * SeqLen #now find average phi-psi angles for each one FixedDih = [] for i in range(SeqLen): ThisResPhiPsi = array([PhiPsi[i] for PhiPsi in PhiPsiList], float) s = std(ThisResPhiPsi, axis=0) a = mean(ThisResPhiPsi, axis=0) if all(s <= DihThresh): FixedDih.append((i, a[0], a[1], s[0], s[1])) #now compute contacts CM = CM / NFrame Contacts = [] SkipMask = [not sequence.Hydrophobic(x) and PhobicsOnly for x in Seq] for i in range(SeqLen): if SkipMask[i]: continue for j in range(i + 3, SeqLen): if SkipMask[j]: continue if CM[i, j] >= ContThresh: Contacts.append((i, j, CM[i, j])) #summarize nums = "1234567890" * (SeqLen / 10 + 1) nums = nums[:SeqLen] if RunSS: s = "CONSENSUS SECONDARY STRUCTURE:\n%s\n%s" % (nums, ResSS) else: s = "" s += "\n\nCONSENSUS DIHEDRALS:\nRes PhiAvg PsiAvg PhiStd PsiStd\n" s += "\n".join([ "%s%d %8.2f %8.2f %8.2f %8.2f" % (Seq[i], i + 1, x, y, z, w) for (j, (i, x, y, z, w)) in enumerate(FixedDih) ]) s += "\n\nCONSENSUS CONTACTS:\nResi Resj FracMade\n" s += "\n".join([ "%s%d %s%d %8.3f" % (Seq[i], i + 1, Seq[j], j + 1, f) for (i, j, f) in Contacts ]) if Verbose: print "" print s #get sequence Seq = sequence.Standardize(Seq) return Seq, Contacts, ResSS, ResSSFrac, FixedDih
def Overlay(NativePdb, Pdb, OutPrefix=None, Label='', SinglePlot=False, hasPseudoGLY=False): global doRotate if OutPrefix is None: OutPrefix = 'go' # parse BBInds p_cg = cgprotein.ProteinNCOS(NativePdb, hasPseudoGLY=hasPseudoGLY) BBInds = p_cg.GetBBInds() # read pdbs pNative = protein.ProteinClass(NativePdb) p = protein.ProteinClass(Pdb, Model=1) # rotate the native pdb if doRotate: pNative = RotateProteinClass(pNative) # align with rotated native struct (produces weird results with vmd, so let vmd do its own ) if not Renderer == 'vmd': p, pNative = AlignProtein(p, pNative, BBInds) # write to first set of tmp pdb files tmpNativePdb = os.path.join(os.getcwd(), '%s_tmpnative.pdb' % OutPrefix) tmpPdb = os.path.join(os.getcwd(), '%s_tmp.pdb' % OutPrefix) pNative.WritePdb(tmpNativePdb) p.WritePdb(tmpPdb) # now reverse map these pdbs to generate an approximate carbonyl group # so that STRIDE can assign secondary structures mapNCOS.ReverseMap(CGPdb=tmpNativePdb, Prefix=tmpNativePdb.split('.pdb')[0], hasPseudoGLY=hasPseudoGLY) mapNCOS.ReverseMap(CGPdb=tmpPdb, Prefix=tmpPdb.split('.pdb')[0], hasPseudoGLY=hasPseudoGLY) # fill dictionary for renderer script d = { 'TMPNATIVEPDB': tmpNativePdb, 'TMPPDB': tmpPdb, } # pymol if Renderer == 'pymol': d['FILENAME'] = OutPrefix + '.png' if not SinglePlot else OutPrefix + '_tmp0.png' tmpPml = OutPrefix + '.pml' file(tmpPml, 'w').write(s_pymol % d) cmdstr1 = '%s -Qc %s' % (PYMOLEXEC, tmpPml) os.system(cmdstr1) for x in [tmpNativePdb, tmpPdb, tmpPml]: os.remove(x) # vmd elif Renderer == 'vmd': d['FILEPREFIX'] = OutPrefix if not SinglePlot else OutPrefix + '_tmp0' d['TACHYONEXEC'] = TACHYONEXEC tmpTcl = OutPrefix + '.tcl' file(tmpTcl, 'w').write(s_vmd % d) cmdstr1 = '%s -dispdev text -eofexit -e %s > /dev/null 2>&1' % ( VMDEXEC, tmpTcl) os.system(cmdstr1) for x in [tmpNativePdb, tmpPdb, tmpTcl, d['FILEPREFIX']]: os.remove(x) else: print 'ERROR: Renderer not found' exit() # if single plot with supplied labels is requested (mostly when used from the command line) if SinglePlot: if not Label: rmsd = sim.geom.RMSD(pNative.Pos[BBInds], p.Pos[BBInds]) print rmsd Label = r'$RMSD = %2.2f \AA$' % rmsd if Renderer == 'pymol': pic0 = OutPrefix + '_tmp0.png' else: pic0 = OutPrefix + '_tmp0.tga' pic1 = OutPrefix + '_tmp1.png' cmdstr2 = '%s %s -trim -bordercolor white -background white -border 50x50 -quality 100 %s' % ( IMAGEMAGICKEXEC, pic0, pic1) os.system(cmdstr2) pic = mpimg.imread(pic1) fig = plt.figure(figsize=(5, 5), facecolor='w', edgecolor='w') ax = fig.add_subplot(1, 1, 1) ax.imshow(pic, aspect='auto') #ax.set_title(Label, fontsize = 8) ax.set_xticks([]) ax.set_yticks([]) ax.set_xticklabels([]) ax.set_yticklabels([]) figname = OutPrefix + '.png' plt.savefig(figname, bbox_inches='tight') for x in [pic0, pic1]: os.remove(x) return
def Map(InPdb, CGPrefix, Model = None, AATraj = None, PrmTop = None, AmberEne = None, LastNFrames = 0, hasPseudoGLY = True): ''' Maps an all-atom pdb to a CG N-C-O-S version. If pseudo GLY side chains are requested, automatically hydrogenates GLY residues that don't have alpha hydrogens''' if hasPseudoGLY: print 'Using pseudo Glycines' # read in protein structure p = protein.ProteinClass(Pdb = InPdb, Model = Model) p.Update() # hydrogenate glycines that don't have hydrogens if hasPseudoGLY: AddH_GLY(p) # extract the all-atom pdb string PdbString = p.GetPdb() Pos = p.Pos Seq = p.Seq # is this structure capped (# adapdted from /share/apps/pdbtools.py) NCap = [s for s in PdbString.split("\n") if s[0:4]=="ATOM" and s[17:20] in NCaps] CCap = [s for s in PdbString.split("\n") if s[0:4]=="ATOM" and s[17:20] in CCaps] hasNCap = len(NCap) > 0 hasCCap = len(CCap) > 0 # Perform some book-keeping on the sequence Seq = p.Seq if hasNCap: Seq = Seq[1:] if hasCCap: Seq = Seq[:-1] ResCount = dict( (x,Seq.count(x)) for x in set(Seq) ) if 'GLY' in ResCount.keys() and not hasPseudoGLY: NCGAtoms = 3 * ResCount['GLY'] + 4 * ( sum(ResCount.values()) - ResCount['GLY'] ) else: NCGAtoms = 4 * len(Seq) # Masks DecapFilter = lambda ResName, AtomName : not ( ResName in (NCaps + CCaps) ) SFilter_Other = lambda ResName, AtomName : not (AtomName == 'N' or AtomName == 'CA' or AtomName == 'C' or AtomName == 'O' or 'H' in AtomName) SFilter_GLY = lambda ResName, AtomName: (ResName == 'GLY' and 'HA' in AtomName) # Parse atom indices based on masks NInds = p.AtomInd(AtomName = 'N', UserFunc = DecapFilter) CAInds = p.AtomInd(AtomName = 'CA', UserFunc = DecapFilter) CInds = p.AtomInd(AtomName = 'C', UserFunc = DecapFilter) OInds = p.AtomInd(AtomName = 'O', UserFunc = DecapFilter) SInds = dict( (i, []) for i in range(len(Seq)) ) for i, r in enumerate(Seq): startres = 1 if hasNCap else 0 # sidechain hydrogens for GLY if r == 'GLY' and hasPseudoGLY: this_SInds = p.AtomInd(ResNum = startres + i, UserFunc = DecapFilter and SFilter_GLY) # find out which hydrogen is prochiral-Si ind_H = findProchiralH_GLY(PosN = Pos[NInds[i]], PosCA = Pos[CAInds[i]], PosC = Pos[CInds[i]], PosH = ( Pos[this_SInds[0]], Pos[this_SInds[1]] ) ) SInds[i] = [this_SInds[ind_H]] else: SInds[i] = p.AtomInd(ResNum = startres + i, UserFunc = DecapFilter and SFilter_Other) # Write Masks to a Map and final CG atoms to CG PDB CurrentChain = -1 s = '' s_bond = '' MapDict = dict( (i, []) for i in range(NCGAtoms) ) n = 0 for i, r in enumerate(Seq): # determine if a new chain starts here thisChain = p.ResChain(i) if not thisChain == CurrentChain: if not i == 0: s += "TER\n" # inter-chain TER records # N lines (do not bond when starting a new chain) if thisChain == CurrentChain: if Seq[i-1] == 'GLY' and not hasPseudoGLY: s_bond += BONDFMT % ('CONECT', n, n+1) else: s_bond += BONDFMT % ('CONECT', n-1, n+1) N_CGInd = n ; n+= 1 MapDict[N_CGInd].append(NInds[i]) s += PDBFMT % (N_CGInd+1, 'N ', r, string.ascii_uppercase[thisChain], i+1, Pos[NInds[i], 0], Pos[NInds[i], 1], Pos[NInds[i], 2], 1.0, 0.0) s += "\n" # now update chains CurrentChain = thisChain # C lines s_bond += BONDFMT % ('CONECT', n, n+1) C_CGInd = n ; n+= 1 MapDict[C_CGInd].append(CAInds[i]) s += PDBFMT % (C_CGInd+1, 'C ', r, string.ascii_uppercase[thisChain], i+1, Pos[CAInds[i], 0], Pos[CAInds[i] ,1], Pos[CAInds[i] ,2], 1.0, 0.0) s += "\n" # O lines s_bond += BONDFMT % ('CONECT', n, n+1) O_CGInd = n; n+=1 MapDict[O_CGInd].extend([ CInds[i], OInds[i] ]) COMPos = (Pos[CInds[i]] + Pos[OInds[i]]) / 2. s += PDBFMT % (O_CGInd+1, 'O ', r, string.ascii_uppercase[thisChain], i+1, COMPos[0], COMPos[1], COMPos[2], 1.0, 0.0) s += "\n" # S lines if not r == 'GLY' or hasPseudoGLY: if genBonds == 2: s_bond += BONDFMT % ('CONECT', n-1, n+1) S_CGInd = n; n+= 1 MapDict[S_CGInd].extend(SInds[i]) COMPos = np.mean(Pos[SInds[i]], axis = 0) s += PDBFMT % (S_CGInd+1, 'S ', r, string.ascii_uppercase[thisChain], i+1, COMPos[0], COMPos[1], COMPos[2], 1.0, 0.0) s += "\n" # Write CG PDB s += "TER\n" # last TER record if genBonds: s += s_bond OutPdb = CGPrefix + '.pdb' with open(OutPdb, 'w') as of: of.write(s) # Map Traj if not AATraj is None: import sim simMap = sim.atommap.PosMap() for i in range(NCGAtoms): simMap += [sim.atommap.AtomMap(Atoms1 = MapDict[i], Atom2 = i)] AtomNames = [] for r in Seq: if r == 'GLY' and not hasPseudoGLY: AtomNames.extend(['N', 'C', 'O']) else: AtomNames.extend(['N', 'C', 'O', 'S']) print 'Reading from AA Traj...' if PrmTop is None: Trj = sim.traj.lammps.Lammps(AATraj) # LammpsTraj else: Trj = sim.traj.amber.Amber(AATraj, PrmTop) # ZamTraj tmpinit = Trj[0] if Trj.FrameData.has_key('BoxL'): BoxL = Trj.FrameData['BoxL'] else: BoxL = [0., 0., 0.] print 'Using Box: ', BoxL print 'Writing to CG Lammps Traj...' if LastNFrames: print 'Read %d frames, writing last %d frames' % (len(Trj), LastNFrames) # Note: the entire traj must be mapped to avoid file write errors CGTraj = CGPrefix + '.lammpstrj.gz' MappedTrj = sim.traj.mapped.Mapped(Trj, simMap, AtomNames = AtomNames, BoxL = BoxL) # now parse out only the necessary portion of the mapped traj if LastNFrames: MappedTrj = MappedTrj[-LastNFrames:] # now convert to Lammps sim.traj.base.Convert(MappedTrj, sim.traj.LammpsWrite, CGTraj, Verbose = True) # Write Ene File if not AmberEne is None: print 'Converting Amber Ene File...' CGEne = CGPrefix + '.ene.dat.gz' of = sim.traj.base.FileOpen(AmberEne, "rb") lines = of.readlines() start = 10 ; enefield_loc = (6,2) Ene = [] for line in lines[start:]: l = line.split() if l[0] == 'L%d' % enefield_loc[0]: this_ene = float(l[enefield_loc[1]]) Ene.append(this_ene) # parse necessary portion of Ene if LastNFrames: Ene = Ene[-LastNFrames:] np.savetxt(CGEne, Ene) of.close() return