def get_sasa_mmtk(selection, state=-1, hydrogens='auto', quiet=1): ''' DESCRIPTION Get solvent accesible surface area using MMTK.MolecularSurface http://dirac.cnrs-orleans.fr/MMTK/ This command is very picky with missing atoms and wrong atom naming. SEE ALSO stub2ala, get_sasa, get_sasa_ball ''' try: import MMTK except ImportError: print(' ImportError: please install MMTK') raise CmdException from MMTK.PDB import PDBConfiguration from MMTK.Proteins import Protein from MMTK.MolecularSurface import surfaceAndVolume try: from cStringIO import StringIO except ImportError: from io import StringIO selection = selector.process(selection) state, quiet = int(state), int(quiet) radius = cmd.get_setting_float('solvent_radius') if hydrogens == 'auto': if cmd.count_atoms('(%s) and hydro' % selection) > 0: hydrogens = 'all' else: hydrogens = 'no_hydrogens' elif hydrogens == 'none': hydrogens = 'no_hydrogens' conf = PDBConfiguration(StringIO(cmd.get_pdbstr(selection))) system = Protein(conf.createPeptideChains(hydrogens)) try: area, volume = surfaceAndVolume(system, radius * 0.1) except: print(' Error: MMTK.MolecularSurface.surfaceAndVolume failed') raise CmdException if not quiet: print(' get_sasa_mmtk: %.3f Angstroms^2 (volume: %.3f Angstroms^3).' % (area * 1e2, volume * 1e3)) return area * 1e2
# A utility function that creates an image of an object by making # a copy and applying a transformation to the copy. def makeImage(object, transformation): image = deepcopy(object) for atom in image.atomList(): atom.setPosition(transformation(atom.position())) return image # Read PDB configuration and create MMTK objects for all peptide chains. # A C-alpha model is used to reduce the system size. You can remove # 'model="calpha"' to get an all-atom model, but for insulin this will # create more than 380000 atoms for the 27-unit-cell crystal! conf = PDBConfiguration('insulin.pdb') chains = Collection(conf.createPeptideChains(model="calpha")) # Apply non-crystallographic symmetries to construct the asymmetric unit asu = Collection(chains) for so in conf.ncs_transformations: if not so.given: image = makeImage(chains, so) asu.addObject(image) # Apply crystallographic symmetries to construct the unit cell # Note that the list of crystallographic symmetries includes the # identity transformation, so the unmodified asu is not added # to the unit cell. cell = Collection() for so in conf.cs_transformations: image = makeImage(asu, so)
def generate_ramachandran(pdb_id): rama_GENERAL = "General" rama_GLYCINE = "Glycine" rama_PROLINE = "Proline" rama_PRE_PRO = "Pre-Pro" ramachandran_types = [rama_GENERAL,rama_GLYCINE,rama_PROLINE,rama_PRE_PRO] # I have used the same colours as RAMPAGE # http://raven.bioc.cam.ac.uk/rampage.php rama_settings = {"General" : ([0, 0.0005, 0.02, 1], ['#FFFFFF','#B3E8FF','#7FD9FF'], "top500angles/pct/rama/rama500-general.data"), # or rama500-general-nosec.data "Glycine" : ([0, 0.002, 0.02, 1], ['#FFFFFF','#FFE8C5','#FFCC7F'], "top500angles/pct/rama/rama500-gly-sym.data"), # or rama500-gly-sym-nosec.data "Proline" : ([0, 0.002, 0.02, 1], ['#FFFFFF','#D0FFC5','#7FFF8C'], "top500angles/pct/rama/rama500-pro.data"), "Pre-Pro" : ([0, 0.002, 0.02, 1], ['#FFFFFF','#B3E8FF','#7FD9FF'], "top500angles/pct/rama/rama500-prepro.data")} #P.S. Also rama500-ala-nosec.data def load_data_file(filename) : STEP=2 HALF_STEP=1 STEP = HALF_STEP*2 lower_bounds = range(-180, 180, STEP) mid_points = range(-180+HALF_STEP, 180+HALF_STEP, STEP) upper_bounds = range(-180+STEP, 180+STEP, STEP) data = numpy.array([[0.0 for x in mid_points] for y in mid_points]) """ # Table name/description: "Top500 General case (not Gly, Pro, or pre-Pro) B<30" # Number of dimensions: 2 # For each dimension, 1 to 2: lower_bound upper_bound number_of_bins wrapping # x1: -180.0 180.0 180 true # x2: -180.0 180.0 180 true # List of table coordinates and values. (Value is last number on each line.) -179.0 -179.0 0.0918642445114388 -179.0 -177.0 0.07105717866463215 ... """ input_file = open(filename,"r") for line in input_file : #Strip the newline character(s) from the end of the line if line[-1]=="\n" : line = line[:-1] if line[-1]=="\r" : line = line[:-1] if line[0]=="#" : #comment pass else : #data parts = line.split() assert len(parts)==3 x1 = float(parts[0]) #phi x2 = float(parts[1]) #psi value = float(parts[2]) assert x1 == float(int(x1)) assert x2 == float(int(x2)) i1 = mid_points.index(int(x1)) i2 = mid_points.index(int(x2)) data[i1,i2]=value input_file.close() return (data, lower_bounds, mid_points, upper_bounds) #filename = "stat/rama/rama500-general.data" #data, lower_bounds, mid_points, upper_bounds = load_data_file(filename) ##print sum(sum(data)) r.library("MASS") #print "Creating R function", r(""" ramachandran.plot <- function(x.scatter, y.scatter, x.grid = seq(0, 1, len = nrow(z)), y.grid = seq(0, 1, len = ncol(z)), z.grid, xlim = range(x.grid, finite = TRUE), ylim = range(y.grid, finite = TRUE), zlim = range(z.grid, finite = TRUE), levels = pretty(zlim, nlevels), nlevels = 20, color.palette = cm.colors, col = color.palette(length(levels) - 1), plot.title="", plot.axes, key.title, key.axes, asp = NA, xaxs = "i", yaxs = "i", las = 1, axes = TRUE, frame.plot = axes, ...) { if (missing(z.grid)) { stop("no 'z.grid' matrix specified") } else if (is.list(x.grid)) { y.grid <- x.grid$y x.grid <- x.grid$x } if (any(diff(x.grid) <= 0) || any(diff(y.grid) <= 0)) stop("increasing 'x.grid' and 'y.grid' values expected") plot.new() plot.window(xlim, ylim, "", xaxs = xaxs, yaxs = yaxs, asp = asp) if (!is.matrix(z.grid) || nrow(z.grid) <= 1 || ncol(z.grid) <= 1) stop("no proper 'z.grid' matrix specified") if (!is.double(z.grid)) storage.mode(z.grid) <- "double" .filled.contour(as.double(x.grid), as.double(y.grid), z.grid, as.double(levels), col = col) if (!(missing(x.scatter)) && !(missing(y.scatter))) { plot.xy(xy.coords(x.scatter,y.scatter,NULL,NULL,NULL,NULL), xlim=xlim, ylim=ylim, xlab="", ylab="", asp=asp, type="p", pch=20, cex=0.1) } if (missing(plot.axes)) { if (axes) { title(main=plot.title, xlab=expression(phi), ylab=expression(psi)) axis(1, at=c(-180,-90,0,90,180)) axis(2, at=c(-180,-90,0,90,180)) } } else plot.axes if (frame.plot) box() if (missing(plot.title)) title(...) else plot.title invisible() } """) #print "Done" def degrees(rad_angle) : """Converts and angle in radians to degrees, mapped to the range [-180,180]""" angle = rad_angle * 180 / math.pi #Note this assume the radians angle is positive as that's what MMTK does while angle > 180 : angle = angle - 360 return angle def next_residue(residue) : """Expects an MMTK residue, returns the next residue in the chain, or None""" #Proteins go N terminal --> C terminal #The next reside is bonded to the C of this atom... for a in residue.peptide.C.bondedTo(): if a.parent.parent != residue: return a.parent.parent return None def residue_amino(residue) : """Expects an MMTK residue, returns the three letter amino acid code in upper case""" if residue : return residue.name[0:3].upper() else : return None def residue_ramachandran_type(residue) : """Expects an MMTK residue, returns ramachandran 'type' (General, Glycine, Proline or Pre-Pro)""" if residue_amino(residue)=="GLY" : return rama_GLYCINE elif residue_amino(residue)=="PRO" : return rama_PROLINE elif residue_amino(next_residue(residue))=="PRO" : #exlcudes those that are Pro or Gly return rama_PRE_PRO else : return rama_GENERAL scatter_phi = dict() scatter_psi = dict() for ramachandran_type in ramachandran_types : scatter_phi[ramachandran_type]=[] scatter_psi[ramachandran_type]=[] pdb_filename = "../data/%s.pdb" % pdb_id #print "Loading PDB file: " + pdb_filename #protein = MMTK.Proteins.Protein("1HMP.pdb", model="no_hydrogens") # Load the PDB file, ignore the hydrogrens, and then build a model of the peptides: configuration = PDBConfiguration(pdb_filename) configuration.deleteHydrogens() protein = Protein(configuration.createPeptideChains(model = "no_hydrogens")) for chain in protein : #print chain.name for residue in chain : phi, psi = residue.phiPsi() #print residue.name, phi, psi if phi and psi : ramachandran_type = residue_ramachandran_type(residue) assert ramachandran_type in ramachandran_types scatter_phi[ramachandran_type].append(degrees(phi)) scatter_psi[ramachandran_type].append(degrees(psi)) assert len(scatter_phi) == len(scatter_psi) #print "Done" png_filename = "ppii%d%s.png" % (random.randint(0, 1000000), pdb_id) png_filepath = "../tmp/" + png_filename png_command = 'png("' + png_filepath + '")' #print r(png_command) #To get four plots on one page, you could use : # #r.split_screen([2,2]) #split into two by two screen # #Or: # #r.layout(Numeric.array([[1,2],[3,4]]), respect=True) # #But I went for simply: #r.par(mfrow=[2,2]) #for (i,ramachandran_type) in enumerate(ramachandran_types) : #pdf_filename = "../%s_%s.pdf" % (pdb_id, ramachandran_type) (rama_levels, rama_colors, rama_filename) = rama_settings["General"] #print "Loading data file: " + rama_filename, data, lower_bounds, mid_points, upper_bounds = load_data_file(rama_filename) #print "Done" ##print "Creating PDF output file: " + pdf_filename, #r.pdf(pdf_filename) #r.plot(scatter_phi, scatter_psi) #print "Generating quadrant %i, %s" % (i+1, ramachandran_type) #r.screen(i+1) #Use small margins to make the plots nice and big, #and specify a SQUARE plot area (to go with aspect ratio, asp=1) #r.par(mar = [2, 2, 2, 2], pty="s") #This function will do a Ramachandran plot in the next quadrant #which we setup using par(mfrow-...) r.ramachandran_plot(x_scatter=scatter_phi[ramachandran_type], y_scatter=scatter_psi[ramachandran_type], x_grid=mid_points, y_grid=mid_points, z_grid=data, xlim=[-180,180], ylim=[-180,180], asp=1.0, plot_title="Ramachandran plot of " + pdb_id, drawlabels=False, levels=rama_levels, col=rama_colors) #print ramachandran_type + " Done" r("dev.off()") #print "Done" return '<img src="/~jean/projet/tmp/' + png_filename + '"/>'
def normalmodes_mmtk(selection, cutoff=12.0, ff='Deformation', first=7, last=10, prefix='mmtk', states=7, factor=-1, quiet=1): ''' DESCRIPTION Fast normal modes for large proteins using an elastic network model (CA only) Based on: http://dirac.cnrs-orleans.fr/MMTK/using-mmtk/mmtk-example-scripts/normal-modes/ ''' try: import MMTK except ImportError: print('Failed to import MMTK, please add to PYTHONPATH') raise CmdException selection = selector.process(selection) cutoff = float(cutoff) first, last = int(first), int(last) states, factor, quiet = int(states), float(factor), int(quiet) from math import log from chempy import cpv from MMTK import InfiniteUniverse from MMTK.PDB import PDBConfiguration from MMTK.Proteins import Protein from MMTK.NormalModes import NormalModes from MMTK.ForceFields import DeformationForceField, CalphaForceField from MMTK.FourierBasis import FourierBasis, estimateCutoff from MMTK.NormalModes import NormalModes, SubspaceNormalModes model = 'calpha' ff = ff.lower() if 'deformationforcefield'.startswith(ff): forcefield = DeformationForceField(cutoff=cutoff / 10.) elif 'calphaforcefield'.startswith(ff): forcefield = CalphaForceField(cutoff=cutoff / 10.) elif 'amber94forcefield'.startswith(ff): from MMTK.ForceFields import Amber94ForceField forcefield = Amber94ForceField() model = 'all' else: raise NotImplementedError('unknown ff = ' + str(ff)) if not quiet: print(' Forcefield:', forcefield.__class__.__name__) if model == 'calpha': selection = '(%s) and polymer and name CA' % (selection) f = StringIO(cmd.get_pdbstr(selection)) conf = PDBConfiguration(f) items = conf.createPeptideChains(model) universe = InfiniteUniverse(forcefield) universe.protein = Protein(*items) nbasis = max(10, universe.numberOfAtoms() / 5) cutoff, nbasis = estimateCutoff(universe, nbasis) if not quiet: print(" Calculating %d low-frequency modes." % nbasis) if cutoff is None: modes = NormalModes(universe) else: subspace = FourierBasis(universe, cutoff) modes = SubspaceNormalModes(universe, subspace) natoms = modes.array.shape[1] frequencies = modes.frequencies if factor < 0: factor = log(natoms) if not quiet: print(' set factor to %.2f' % (factor)) if True: # cmd.count_atoms(selection) != natoms: import tempfile, os from MMTK import DCD filename = tempfile.mktemp(suffix='.pdb') sequence = DCD.writePDB(universe, None, filename) z = [a.index for a in sequence] selection = cmd.get_unused_name('_') cmd.load(filename, selection, zoom=0) os.remove(filename) if cmd.count_atoms(selection) != natoms: print('hmm... still wrong number of atoms') def eigenfacs_iter(mode): x = modes[mode - 1].array return iter(x.take(z, 0)) for mode in range(first, min(last, len(modes)) + 1): name = prefix + '%d' % mode cmd.delete(name) if not quiet: print(' normalmodes: object "%s" for mode %d with freq. %.6f' % \ (name, mode, frequencies[mode-1])) for state in range(1, states + 1): cmd.create(name, selection, 1, state, zoom=0) cmd.alter_state( state, name, '(x,y,z) = cpv.add([x,y,z], cpv.scale(next(myit), myfac))', space={ 'cpv': cpv, 'myit': eigenfacs_iter(mode), 'next': next, 'myfac': 1e2 * factor * ((state - 1.0) / (states - 1.0) - 0.5) }) cmd.delete(selection) if model == 'calpha': cmd.set('ribbon_trace_atoms', 1, prefix + '*') cmd.show_as('ribbon', prefix + '*') else: cmd.show_as('lines', prefix + '*')
elif 'amber94forcefield'.startswith(ff): from MMTK.ForceFields import Amber94ForceField forcefield = Amber94ForceField() model = 'all' else: raise NotImplementedError('unknown ff = ' + str(ff)) if not quiet: print ' Forcefield:', forcefield.__class__.__name__ if model == 'calpha': selection = '(%s) and polymer and name CA' % (selection) from cStringIO import StringIO f = StringIO(cmd.get_pdbstr(selection)) conf = PDBConfiguration(f) items = conf.createPeptideChains(model) universe = InfiniteUniverse(forcefield) universe.protein = Protein(*items) nbasis = max(10, universe.numberOfAtoms()/5) cutoff, nbasis = estimateCutoff(universe, nbasis) if not quiet: print " Calculating %d low-frequency modes." % nbasis if cutoff is None: modes = NormalModes(universe) else: subspace = FourierBasis(universe, cutoff) modes = SubspaceNormalModes(universe, subspace)
# # Note that this will not necessarily work with any PDB file. Many files # use non-crystallographic symmetry information in a non-standard way. # This is usually explained in REMARK records, but those cannot be # evaluated automatically. # from MMTK import * from MMTK.PDB import PDBConfiguration from MMTK.Proteins import Protein # Read PDB configuration and create MMTK objects for all peptide chains. # A C-alpha model is used to reduce the system size. You can remove # 'model="calpha"' to get an all-atom model. conf = PDBConfiguration('insulin.pdb') chains = Collection(conf.createPeptideChains(model="calpha")) # Copy and transform the objects representing the asymmetric unit in order # to obtain the contents of the unit cell. chains = conf.asuToUnitCell(chains) # Construct a periodic universe representing the unit cell. universe = conf.createUnitCellUniverse() # Add each chain as one protein. If the unit cell contains multimers, # the chains must be combined into protein objects by hand, # as no corresponding information can be extracted from the PDB file. for chain in chains: universe.addObject(Protein(chain)) # If VMD has been defined as the PDB viewer, this will not only show
start = time.time() # # First problem: construct an all-atom model from a structure without # hydrogens. This is the standard problem when using an all-atom force # field with crystallographic structures. # # # Load the PDB file. configuration = PDBConfiguration('insulin.pdb') # Construct the peptide chain objects. This also constructs positions # for any missing hydrogens, using geometrical criteria. chains = configuration.createPeptideChains() # Make the protein object. #insulin = Protein(chains) # Define system universe = InfiniteUniverse(Amber99ForceField(mod_files=['frcmod.ff99SB'])) universe.protein = Protein(chains) # Initialize velocities universe.initializeVelocitiesToTemperature(50. * Units.K) print 'Temperature: ', universe.temperature() print 'Momentum: ', universe.momentum() print 'Angular momentum: ', universe.angularMomentum() file.write('Temperature: ' + str(universe.temperature()) + "\n") file.write('Momentum: ' + str(universe.momentum()) + "\n")
forcefield = CalphaForceField(cutoff=cutoff / 10.) elif 'amber94forcefield'.startswith(ff): from MMTK.ForceFields import Amber94ForceField forcefield = Amber94ForceField() model = 'all' else: raise NotImplementedError('unknown ff = ' + str(ff)) if not quiet: print(' Forcefield:', forcefield.__class__.__name__) if model == 'calpha': selection = '(%s) and polymer and name CA' % (selection) f = StringIO(cmd.get_pdbstr(selection)) conf = PDBConfiguration(f) items = conf.createPeptideChains(model) universe = InfiniteUniverse(forcefield) universe.protein = Protein(*items) nbasis = max(10, universe.numberOfAtoms() / 5) cutoff, nbasis = estimateCutoff(universe, nbasis) if not quiet: print(" Calculating %d low-frequency modes." % nbasis) if cutoff is None: modes = NormalModes(universe) else: subspace = FourierBasis(universe, cutoff) modes = SubspaceNormalModes(universe, subspace)
# # First problem: construct an all-atom model from a structure without # hydrogens. This is the standard problem when using an all-atom force # field with crystallographic structures. # # Note: the simple solution in this case is just # insulin = Protein('insulin.pdb') # but the explicit form shown below is necessary when any kind of # modification is required. # # Load the PDB file. configuration = PDBConfiguration('insulin.pdb') # Construct the peptide chain objects. This also constructs positions # for any missing hydrogens, using geometrical criteria. chains = configuration.createPeptideChains() # Make the protein object. insulin = Protein(chains) # Write out the structure with hydrogens to a new file - we will use # it as an input example later on. insulin.writeToFile('insulin_with_h.pdb') # # Second problem: read a file with hydrogens and create a structure # without them. This is useful for analysis; if you don't need the # hydrogens, processing is faster without them. Or you might want # to compare structures with and without hydrogens. #
# # First problem: construct an all-atom model from a structure without # hydrogens. This is the standard problem when using an all-atom force # field with crystallographic structures. # # Note: the simple solution in this case is just # insulin = Protein('insulin.pdb') # but the explicit form shown below is necessary when any kind of # modification is required. # # Load the PDB file. configuration = PDBConfiguration('insulin.pdb') # Construct the peptide chain objects. This also constructs positions # for any missing hydrogens, using geometrical criteria. chains = configuration.createPeptideChains() # Make the protein object. insulin = Protein(chains) # Write out the structure with hydrogens to a new file - we will use # it as an input example later on. insulin.writeToFile('insulin_with_h.pdb') # # Second problem: read a file with hydrogens and create a structure # without them. This is useful for analysis; if you don't need the # hydrogens, processing is faster without them. Or you might want # to compare structures with and without hydrogens. # # Load the PDB file.