def __init__(self, molid, **kwargs): """ Creates a CHARMM writer Args: molid (int): VMD molecule ID of system to write tmp_dir (str): Directory for temporary files. Defaults to "." lipid_sel (str): Lipid selection string. Defaults to "lipid" hmr (bool): If hydrogen masses should be repartitioned. Defaults to False forcefield (str): Forcefield to use, either "charmm" or "amber" water_model (str): Water model to use extra_topos (list of str): Additional topology (.str, .off, .lib) to include. extra_params (list of str): Additional parameter sets (.str, .frcmod) override_defaults (bool): If set, omits default forcefield parameters. debug_verbose (bool): Prints additional output, like from psfgen. """ # Initialize default options super(CharmmWriter, self).__init__(molid, **kwargs) # Create a psf generator object self.psfgen = PsfGen() # Set forcefield default topologies and parameters self.forcefield = kwargs.get("forcefield", "charmm") self.water_model = kwargs.get("water_model", "tip3") self.topologies = self.get_topologies(self.forcefield, self.water_model) self.parameters = self.get_parameters(self.forcefield, self.water_model) if "charmm" in self.forcefield: if self.hmr: raise DabbleError("HMR not supported with CHARMM ff yet") # Handle override and extra topologies if self.override: self.topologies = [] self.parameters = [] # Now extra topologies (put in self by super __init__) self.topologies.extend(self.extra_topos) self.parameters.extend(self.extra_params) # Once all topologies defined, initialize matcher only if # using CHARMM topologies (not if we're doing a conversion) if "charmm" in self.forcefield or "opls" in self.forcefield: self.matcher = CharmmMatcher(self.topologies) # Keep track of segment numbers for protein and other self.segint = 0
def test_formats(tmpdir): """ Tests read/write of psf/namdbin files """ from psfgen import PsfGen p = str(tmpdir.mkdir("formats")) os.chdir(dir) gen = PsfGen(output=os.devnull) gen.read_topology("top_all36_caps.rtf") gen.read_topology("top_all36_prot.rtf") gen.add_segment(segid="P0", pdbfile="psf_protein_P0.pdb") gen.read_coords(segid="P0", filename="psf_protein_P0.pdb") gen.add_segment(segid="P1", pdbfile="psf_protein_P1.pdb") gen.read_coords(segid="P1", filename="psf_protein_P1.pdb") # Write a PSF and a NAMD binary file gen.write_psf(filename=os.path.join(p, "pdbin.psf")) gen.write_namdbin(filename=os.path.join(p, "pdbin.bin")) del gen # Read in the PSF and NAMD binary file. Topology files should be # automatically loaded, too. Read in coordinates also as velocities # to test the velocity read in as well. gen = PsfGen(output=os.devnull) os.chdir(p) gen.read_psf(filename=os.path.join(p, "pdbin.psf"), namdbinfile=os.path.join(p, "pdbin.bin"), velnamdbinfile=os.path.join(p, "pdbin.bin")) assert gen.get_topologies() == ["top_all36_caps.rtf", "top_all36_prot.rtf"] assert gen.get_segids() == ["P0", "P1"] assert gen.get_coordinates(segid="P0", resid=1) \ == gen.get_velocities(segid="P0", resid=1)
def test_query(): """ Tests that query functions work correctly """ from psfgen import PsfGen gen = PsfGen(output=os.devnull) os.chdir(dir) gen.read_topology("top_all36_caps.rtf") gen.read_topology("top_all36_prot.rtf") gen.add_segment(segid="P0", pdbfile="psf_protein_P0.pdb") gen.read_coords(segid="P0", filename="psf_protein_P0.pdb") gen.add_segment(segid="P1", pdbfile="psf_protein_P1.pdb") gen.read_coords(segid="P1", filename="psf_protein_P1.pdb") gen.patch(patchname="DISU", targets=[("P0", "10"), ("P0", "15")]) assert gen.get_topologies() == ["top_all36_caps.rtf", "top_all36_prot.rtf"] # Check residue names query resnames = gen.get_residue_types() assert len(resnames) == 26 assert "CYS" in resnames assert "TIP3" not in resnames # Check patches query patches = gen.get_patches(list_all=True) assert len(patches) == 25 assert "CYSD" in patches assert "SEP" not in patches # Check segids query assert gen.get_segids() == ["P0", "P1"] # Check resids query assert gen.get_resids("P0") == [str(_) for _ in range(1, 26)] assert gen.get_resids("P1") == [str(_) for _ in range(0, 31)] # Check resname query, with str or int assert gen.get_resname(segid="P0", resid="2") == "LEU" assert gen.get_resname(segid="P1", resid=29) == "SER" # Check applied patches query assert gen.get_patches() == [("DISU", "P0", "10"), ("DISU", "P0", "15")] assert gen.get_first(segid="P0") is None assert gen.get_last(segid="P1") is None # Check atom queries assert gen.get_atom_names(segid="P0", resid="10") \ == ['N', 'HN', 'CA', 'HA', 'CB', 'HB1', 'HB2', 'SG', 'C', 'O'] assert set(gen.get_masses(segid="P0", resid=1)) == {1.008, 12.011, 15.999} assert gen.get_atom_indices(segid="P1", resid=0) == list(range(1, 7)) assert set(gen.get_charges(segid="P0", resid="10")) \ == {-0.47, 0.07, 0.09, 0.31, -0.1, -0.08, 0.51, -0.51} # Check coordinates and velocities assert len(gen.get_coordinates(segid="P1", resid=25)) == 17 assert set(gen.get_velocities(segid="P1", resid=1)) == {(0., 0., 0.)}
def test_single_chain(tmpdir): """ Tests simple realistic system building """ from psfgen import PsfGen p = str(tmpdir.mkdir("single_chain")) os.chdir(dir) gen = PsfGen(output=os.devnull) gen.read_topology("top_all36_caps.rtf") gen.read_topology("top_all36_prot.rtf") gen.read_topology("top_water_ions.rtf") # Read protein gen.add_segment(segid="P0", pdbfile="psf_protein_P0.pdb") gen.read_coords(segid="P0", filename="psf_protein_P0.pdb") gen.add_segment(segid="P1", pdbfile="psf_protein_P1.pdb") gen.read_coords(segid="P1", filename="psf_protein_P1.pdb") # Read waters, with 10k atoms per file to avoid PDB limitations gen.add_segment(segid="W0", pdbfile="psf_wat_0.pdb") gen.read_coords(segid="W0", filename="psf_wat_0.pdb") gen.add_segment(segid="W1", pdbfile="psf_wat_1.pdb") gen.read_coords(segid="W1", filename="psf_wat_1.pdb") # Read ions gen.add_segment(segid="I", pdbfile="psf_ions.pdb") gen.read_coords(segid="I", filename="psf_ions.pdb") # Add disulfides gen.patch(patchname="DISU", targets=[("P0", "10"), ("P0", "15")]) gen.patch(patchname="DISU", targets=[("P0", "24"), ("P1", "23")]) gen.patch(patchname="DISU", targets=[("P0", "11"), ("P1", "11")]) # Regenerate gen.regenerate_angles() gen.regenerate_dihedrals() # Write os.chdir(p) gen.write_psf(filename="output.psf") gen.write_pdb(filename="output.pdb") # Load as a molecule with vmd-python and check it's correct m = molecule.load("psf", "output.psf", "pdb", "output.pdb") check_correctness(m) molecule.delete(m)
def test_case_sensitivity(): """ Tests setting case sensitivity. Do this with 2 objects because you can't change the setting after reading in topology files. """ from psfgen import PsfGen os.chdir(dir) gen = PsfGen(case_sensitive=True, output=os.devnull) gen.read_topology("top_casesensitive.rtf") assert gen.get_residue_types() == ["ACE", "Ace"] # Can't change case sensitivity after topologies have been read with pytest.raises(ValueError): gen.case_sensitive = False del gen gen = PsfGen(case_sensitive=True, output=os.devnull) gen.case_sensitive = False gen.read_topology("top_casesensitive.rtf") assert gen.get_residue_types() == ["ACE"]
def test_delete(): """ Tests removing atoms """ from psfgen import PsfGen os.chdir(dir) gen = PsfGen(output=os.devnull) gen.read_topology("top_all36_caps.rtf") gen.read_topology("top_all36_prot.rtf") gen.add_segment(segid="P0", pdbfile="psf_protein_P0.pdb") gen.read_coords(segid="P0", filename="psf_protein_P0.pdb") # Delete a specific atom assert "CAY" in gen.get_atom_names(segid="P0", resid=1) gen.delete_atoms(segid="P0", resid=1, atomname="CAY") assert "CAY" not in gen.get_atom_names(segid="P0", resid=1) # Try deleting a capping group assert gen.get_resids("P0") == [str(_) for _ in range(1, 26)] gen.delete_atoms(segid="P0", resid=1) assert gen.get_resids("P0") == [str(_) for _ in range(2, 26)] # Add and then delete a segment gen.add_segment(segid="DELETE", pdbfile="psf_protein_P1.pdb") assert gen.get_segids() == ["P0", "DELETE"] gen.delete_atoms(segid="DELETE") assert gen.get_segids() == ["P0"]
def test_mutation(tmpdir): """ Tests mutation of L2A in chain 0. Also as a result tests guessing coordinates """ from psfgen import PsfGen p = str(tmpdir.mkdir("mutation")) os.chdir(dir) gen = PsfGen(output=os.devnull) gen.read_topology("top_all36_caps.rtf") gen.read_topology("top_all36_prot.rtf") gen.add_segment(segid="P0", pdbfile="psf_protein_P0.pdb", mutate=[("2", "ALA")]) gen.read_coords(segid="P0", filename="psf_protein_P0.pdb") gen.patch(patchname="DISU", targets=[("P0", "10"), ("P0", "15")]) # Guess coordinates for ALA mutation gen.guess_coords() # Set one specific coordinate gen.set_position(segid="P0", resid="2", atomname="HB1", position=(1.0, 2.0, 3.0)) # Regenerate gen.regenerate_angles() gen.regenerate_dihedrals() # Write os.chdir(p) gen.write_psf(filename="output.psf") gen.write_pdb(filename="output.pdb") # Check results with vmd-python m = molecule.load("psf", "output.psf", "pdb", "output.pdb") assert len(set(atomsel("protein").fragment)) == 1 assert len(set(atomsel("resname ACE NMA NME").residue)) == 2 # Test mutation happened and resid 2 is ALA not LEU assert set(atomsel("resid 2").resname) == set(["ALA"]) # Check coordinate guessing happened and HB3 has a nonzero position assert atomsel("resid 2 and name HB3").x != [0.0] assert atomsel("resid 2 and name HB3").y != [0.0] assert atomsel("resid 2 and name HB3").z != [0.0] # Check manual coordinate setting happened assert atomsel("resid 2 and name HB1").x == [1.0] assert atomsel("resid 2 and name HB1").y == [2.0] assert atomsel("resid 2 and name HB1").z == [3.0] molecule.delete(m)
def test_ends(tmpdir): """ Tests adding patches to the beginning and end, as well as adding residues in the segment """ from psfgen import PsfGen p = str(tmpdir.mkdir("mutation")) os.chdir(dir) gen = PsfGen(output=os.devnull) gen.read_topology("top_all36_prot.rtf") # Add neutral N-terminus # Add an alanine then a protonated glutamate at the C-terminus. gen.add_segment(segid="P", pdbfile="protein_nocaps.pdb", first="NTER", last="GLUP", residues=[("25", "ALA"), ("26", "GLU")]) # Set coordinates and regenerate angles and dihedrals gen.read_coords(segid="P", filename="protein_nocaps.pdb") gen.guess_coords() # Check internal state assert gen.get_resids("P") == [str(_) for _ in range(2, 27)] assert gen.get_resname(segid="P", resid=25) == "ALA" assert gen.get_patches(list_defaults=True) == [('GLUP', 'P', '26'), ('NTER', 'P', '2')] assert gen.get_first(segid="P") == "NTER" assert gen.get_last(segid="P") == "GLUP" # Output os.chdir(p) gen.write_psf(filename="output.psf") gen.write_pdb(filename="output.pdb") # Check all resids are present and that 2 extra ones were added m = molecule.load("psf", "output.psf", "pdb", "output.pdb") assert list(set(atomsel("all").resid)) == list(range(2, 27)) assert len(atomsel("all")) == 382 assert set(atomsel("resid 25").resname) == set(["ALA"]) # Check patches were applied correctly assert "HT1" in atomsel("resid 2").name assert "HN" not in atomsel("resid 2").name assert "HE2" in atomsel("resid 26").name # Check all coordinates are set assert 0.0 not in atomsel("all").x assert 0.0 not in atomsel("all").y assert 0.0 not in atomsel("all").z molecule.delete(m)
def test_alias(): """ Tests atom and residue aliases, either at the topology or the PDB level """ from psfgen import PsfGen os.chdir(dir) gen = PsfGen(output=os.devnull) gen.read_topology("top_all36_caps.rtf") gen.read_topology("top_all36_prot.rtf") gen.alias_residue(top_resname="LEU", pdb_resname="LEX") gen.alias_residue(top_resname="ARG", pdb_resname="AAA") gen.alias_atom(top_atomname="N", pdb_atomname="NOOO", resname="PHE") gen.add_segment(segid="P", pdbfile="protein_newnames.pdb") gen.read_coords(segid="P", filename="psf_protein_P0.pdb") assert gen.get_resname(segid="P", resid=2) == "LEU" assert gen.get_resname(segid="P", resid=5) == "ALA" assert "N" in gen.get_atom_names(segid="P", resid=23)
def test_set(): """ Tests that setters work correctly """ from psfgen import PsfGen os.chdir(dir) gen = PsfGen(output=os.devnull) gen.read_topology("top_all36_caps.rtf") gen.read_topology("top_all36_prot.rtf") gen.add_segment(segid="P", pdbfile="psf_protein_P1.pdb") assert gen.get_segids() == ["P"] # Set segid gen.set_segid(segid="P", new_segid="P1") assert gen.get_segids() == ["P1"] gen.read_coords(segid="P1", filename="psf_protein_P1.pdb") # Set resname assert gen.get_resname(segid="P1", resid="1") == "ASP" gen.set_resname(segid="P1", resid="1", new_resname="ASH") assert gen.get_resname(segid="P1", resid="1") == "ASH" # Set charge gen.set_charge(segid="P1", resid="1", atomname="O", charge=-1.) assert -1.0 in gen.get_charges(segid="P1", resid="1") # Set atom name gen.set_atom_name(segid="P1", resid="1", atomname="N", new_atomname="NO") assert "N" not in gen.get_atom_names(segid="P1", resid="1") assert "NO" in gen.get_atom_names(segid="P1", resid="1") # Set coord gen.set_position(segid="P1", resid="1", atomname="HN", position=(0., 0., -1.)) assert (0., 0., -1.) in gen.get_coordinates(segid="P1", resid="1") # Set velocity gen.set_velocity(segid="P1", resid="1", atomname="NO", velocity=( 5., 5., 3., )) assert ( 5., 5., 3., ) in gen.get_velocities(segid="P1", resid="1")
class CharmmWriter(MoleculeWriter): """ An object that handles all the conversions to a psf file by interfacing with psfgen. Writes a pdb/psf file pair from the current molecule using the CHARMM36 topology and atom names/types. Interfaces with psfgen by dynamically generating the .tcl file that psfgen takes as input. Prompts the user for additional topology files and helps with matching atom names that cannot be automatically translated to the charmm naming conventions. """ #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # CONSTANTS # #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ WATER_NAMES = { "tip3": "TIP3", "tip4e": "TP4E", "spce": "SPCE", } WATER_O_NAME = "OH2" WATER_H_NAMES = ["H1", "H2"] #========================================================================== def __init__(self, molid, **kwargs): """ Creates a CHARMM writer Args: molid (int): VMD molecule ID of system to write tmp_dir (str): Directory for temporary files. Defaults to "." lipid_sel (str): Lipid selection string. Defaults to "lipid" hmr (bool): If hydrogen masses should be repartitioned. Defaults to False forcefield (str): Forcefield to use, either "charmm" or "amber" water_model (str): Water model to use extra_topos (list of str): Additional topology (.str, .off, .lib) to include. extra_params (list of str): Additional parameter sets (.str, .frcmod) override_defaults (bool): If set, omits default forcefield parameters. debug_verbose (bool): Prints additional output, like from psfgen. """ # Initialize default options super(CharmmWriter, self).__init__(molid, **kwargs) # Create a psf generator object self.psfgen = PsfGen() # Set forcefield default topologies and parameters self.forcefield = kwargs.get("forcefield", "charmm") self.water_model = kwargs.get("water_model", "tip3") self.topologies = self.get_topologies(self.forcefield, self.water_model) self.parameters = self.get_parameters(self.forcefield, self.water_model) if "charmm" in self.forcefield: if self.hmr: raise DabbleError("HMR not supported with CHARMM ff yet") # Handle override and extra topologies if self.override: self.topologies = [] self.parameters = [] # Now extra topologies (put in self by super __init__) self.topologies.extend(self.extra_topos) self.parameters.extend(self.extra_params) # Once all topologies defined, initialize matcher only if # using CHARMM topologies (not if we're doing a conversion) if "charmm" in self.forcefield or "opls" in self.forcefield: self.matcher = CharmmMatcher(self.topologies) # Keep track of segment numbers for protein and other self.segint = 0 #========================================================================= def write(self, filename): """ Writes the parameter and topology files Args: filename (str): File name to write. File type suffix will be added. """ self.outprefix = filename # Put our molecule on top old_top = molecule.get_top() molecule.set_top(self.molid) # Amber forcefield done with AmberWriter then conversion if "amber" in self.forcefield: # Avoid circular import by doing it here from dabble.param import AmberWriter prmtopgen = AmberWriter(molid=self.molid, tmp_dir=self.tmp_dir, forcefield=self.forcefield, water_model=self.water_model, hmr=self.hmr, lipid_sel=self.lipid_sel, extra_topos=self.extra_topos, extra_params=self.extra_params, override_defaults=self.override, debug_verbose=self.debug) prmtopgen.write(self.outprefix) self._prmtop_to_charmm() # Charmm forcefield elif "charmm" in self.forcefield: self._run_psfgen() # OPLS forcefield. Same as charmm but list separately for readability elif "opls" in self.forcefield: self._run_psfgen() else: raise DabbleError("Unsupported forcefield '%s' for CharmmWriter" % self.forcefield) # Check output and finish up self._check_psf_output() # Reset top molecule molecule.set_top(old_top) #========================================================================= # Static methods # #========================================================================= @classmethod def get_topologies(cls, forcefield, water_model): if forcefield == "charmm": topos = [ "top_all36_caps.rtf", "top_all36_cgenff.rtf", "top_all36_prot.rtf", "top_all36_lipid.rtf", "top_all36_carb.rtf", "top_all36_na.rtf", "toppar_all36_prot_na_combined.str", "toppar_all36_prot_fluoro_alkanes.str" ] if water_model == "tip3": topos.append("toppar_water_ions.str") elif water_model == "tip4e": topos.append("toppar_water_ions_tip4p_ew.str") elif water_model == "spce": topos.append("toppar_water_ions_spc_e.str") elif forcefield == "opls": topos = ["opls_aam.rtf", "opls_aam_caps.rtf"] if water_model != "tip3": raise DabbleError("Only TIP3 water model supported for OPLS") elif forcefield == "amber": from dabble.param import AmberWriter # avoid circular dependency return AmberWriter.get_topologies(forcefield, water_model) else: raise ValueError("Invalid forcefield: '%s'" % forcefield) return [cls._get_forcefield_path(top) for top in topos] #========================================================================= @classmethod def get_parameters(cls, forcefield, water_model): if forcefield == "charmm": prms = [ "par_all36m_prot.prm", "par_all36_cgenff.prm", "par_all36_lipid.prm", "par_all36_carb.prm", "par_all36_na.prm", "toppar_all36_prot_na_combined.str" ] if water_model == "tip3": prms.append("toppar_water_ions.str") elif water_model == "tip4e": prms.append("toppar_water_ions_tip4p_ew.str") elif water_model == "spce": prms.append("toppar_water_ions_spc_e.str") elif forcefield == "amber": from dabble.param import AmberWriter # avoid circular dependency return AmberWriter.get_parameters(forcefield, water_model) elif forcefield == "opls": prms = ["opls_aam.prm"] if water_model != "tip3": raise DabbleError("Only TIP3 water model supported for OPLS") else: raise ValueError("Invalid forcefield: '%s'" % forcefield) return [cls._get_forcefield_path(par) for par in prms] #========================================================================= # Private methods # #========================================================================= def _write_water_blocks(self): """ Writes a lot of temporary files with 10000 waters each, to bypass psfgen being stupid with files containing more than 10000 of a residue. """ # Set water names and write them to PDB file(s) self._set_water_names() pdbs = self._write_water_pdbs() for i, pdb in enumerate(pdbs): self.psfgen.add_segment(segid="W%d" % i, pdbfile=pdb) self.psfgen.read_coords(segid="W%d" % i, filename=pdb) # If water model includes dummy atoms, guess the coordinates # This is safe as only waters have been added to the psfgen state # so far, so actually broken atoms won't be fixed on accident. if self.water_model != "tip3": self.psfgen.guess_coords() self.psfgen.regenerate_angles() self.psfgen.regenerate_dihedrals() #========================================================================== def _write_lipid_blocks(self): """ Writes a temporary PDB file containing the lipids for later use by psfgen. Renumbers the lipid residues because some can have **** instead of an integer for resid in large systems, which will crash psfgen. Also sets atom names for some common lipids (currently POPC) Raises: NotImplementedError if more than 10,000 lipids are present since it doesn't support feeding multiple lipid blocks to psfgen currently NotImplementedError if lipid other than POPC,POPE,POPG is found """ # Put current molecule on top to simplify atom selection old_top = molecule.get_top() molecule.set_top(self.molid) # Collect lipid residues up alll = atomsel('(%s) and user 1.0' % self.lipid_sel) residues = list(set(alll.residue)) # Lipids not compatible with AMBER parameters, CHARMM format if alll and ("amber" in self.forcefield or "opls" in self.forcefield): raise ValueError( "AMBER or OPLS parameters not supported for lipids" " in CHARMM output format") # Sanity check for < 10k lipids if len(residues) >= 10000: raise NotImplementedError("More than 10k lipids found") # Loop through all residues and renumber and correctly name them lipress = [] for resname in set(alll.resname): lipress.extend(self._rename_by_resname(resname, renumber=True)) # Write temporary lipid pdb _, temp = tempfile.mkstemp(suffix='.pdb', prefix='psf_lipid_', dir=self.tmp_dir) os.close(_) saved_lips = atomsel("residue %s" % ' '.join(str(_) for _ in lipress)) saved_lips.user = 0.0 saved_lips.write('pdb', temp) # Generate lipid segment self.psfgen.add_segment(segid="L", pdbfile=temp) self.psfgen.read_coords(segid="L", filename=temp) # Put old top back molecule.set_top(old_top) #========================================================================== def _write_ion_blocks(self): """ Writes a PDB file containing correctly named ions for use by psfgen, and instructs psfgen to use it in TCL code. """ # Put our molecule on top to simplify atom selection language old_top = molecule.get_top() molecule.set_top(self.molid) # Select all ions allions = [] for resname in set(atomsel("numbonds 0").resname): allions.extend(self._rename_by_resname(resname, renumber=True)) # Stop if no ions were found if not allions: return # Save ions as pdb allsel = atomsel("residue %s" % " ".join(str(_) for _ in allions)) allsel.resid = range(len(allsel)) allsel.user = 0.0 _, temp = tempfile.mkstemp(suffix=".pdb", prefix="psf_ions_", dir=self.tmp_dir) os.close(_) allsel.write("pdb", temp) self.psfgen.add_segment(segid="I", pdbfile=temp) self.psfgen.read_coords(segid="I", filename=temp) molecule.set_top(old_top) #========================================================================== def _find_single_residue_names(self, resname, molid): """ Uses graph matcher and available topologies to match up ligand names automatically. Tries to use graphs, and if there's an uneven number of atoms tries to match manually to suggest which atoms are most likely missing. Args: resname (str): Residue name of the ligand that will be written. All ligands will be checked separately against the graphs. molid (int): VMD molecule ID to consider Returns: (list of ints): Residue numbers (not resid) of all input ligands that were successfully matched. Need to do it this way since residue names can be changed in here to different things. Raises: ValueError if number of resids does not match number of residues as interpreted by VMD NotImplementedError if a residue could not be matched to a graph. """ # Put our molecule on top old_top = molecule.get_top() molecule.set_top(molid) # Sanity check that there is no discrepancy between defined resids and # residues as interpreted by VMD. residues = set(atomsel("user 1.0 and resname '%s'" % resname).residue) for chain in set(atomsel("user 1.0 and resname '%s'" % resname).chain): tempres = set( atomsel("user 1.0 and resname '%s' and chain %s" % (resname, chain)).residue) resids = set( atomsel("user 1.0 and resname '%s' and chain %s" % (resname, chain)).resid) if len(tempres) != len(resids): raise DabbleError("VMD found %d residues for resname '%s', " "but there are %d resids in chain %s! " "Check input." % (len(tempres), resname, len(resids), chain)) for residue in residues: sel = atomsel("residue %s and resname '%s' and user 1.0" % (residue, resname)) newname, atomnames = self.matcher.get_names(sel, print_warning=True) if not newname: resname, patch, atomnames = self.matcher.get_patches(sel) if not newname: print( "ERROR: Could not find a residue definition for %s:%s" % (resname, residue)) raise NotImplementedError( "No residue definition for %s:%s" % (resname, residue)) print("\tApplying patch %s to ligand %s" % (patch, newname)) # Do the renaming self._apply_naming_dictionary(atomnames=atomnames, resnames=newname, verbose=True) molecule.set_top(old_top) return list(residues) #========================================================================== def _write_generic_block(self, residues): """ Matches ligands to available topology file, renames atoms, and then writes temporary files for the ligands Args: residues (list of int): Residue numbers to be written. Will all be written to one segment. Returns: True if successful """ # Put our molecule on top to simplify atom selection language old_top = molecule.get_top() molecule.set_top(self.molid) alig = atomsel('user 1.0 and residue %s' % " ".join([str(x) for x in residues])) # Write temporary file containg the residues and update tcl commands _, temp = tempfile.mkstemp(suffix='.pdb', prefix='psf_block_', dir=self.tmp_dir) os.close(_) alig.write('pdb', temp) alig.user = 0.0 # Get next available segment name segname = "B%d" % self.segint self.segint += 1 self.psfgen.add_segment(segid=segname, pdbfile=temp) self.psfgen.read_coords(segid=segname, filename=temp) if old_top != -1: molecule.set_top(old_top) return True #========================================================================== def _write_protein_blocks(self, molid, frag): """ Writes a protein fragment to a pdb file for input to psfgen Automatically assigns amino acid names Args: molid (int): VMD molecule ID of renumbered protein frag (str): Fragment to write Returns: (list of Patches): Patches to add to psfgen input files """ print("Setting protein atom names") # Put our molecule on top to simplify atom selection language old_top = molecule.get_top() molecule.set_top(molid) patches = set() extpatches = set() # Get a unique and reliabe segment name seg = self.matcher.get_protein_segname(molid, frag) fragsel = atomsel("fragment '%s'" % frag) residues = list(set(fragsel.residue)) for residue in residues: sel = atomsel('residue %s' % residue) resid = sel.resid[0] # Only try to match single amino acid if there are 1 or 2 bonds if len(self.matcher.get_extraresidue_atoms(sel)) < 3: (newname, atomnames) = self.matcher.get_names(sel, False) # See if it's a disulfide bond participant else: (newname, patch, atomnames) = \ self.matcher.get_disulfide("residue %d" % residue, molid) if newname: extpatches.add(patch) # Couldn't find a match. See if it's a patched residue if not newname: (newname, patchname, atomnames) = self.matcher.get_patches(sel) if newname: # This returns patch name only, not a Patch object patches.add( Patch(name=patchname, segids=[seg], resids=[resid])) # Fall through to error condition if not newname: raise DabbleError("Couldn't find a patch for %s:%s" % (sel.resname[0], resid)) # Do the renaming self._apply_naming_dictionary(atomnames=atomnames, resnames=newname) # Save protein chain in the correct order filename = self.tmp_dir + '/psf_protein_%s.pdb' % seg _write_ordered_pdb(filename, "fragment '%s'" % frag, molid) print("\tWrote %d atoms to the protein segment %s" % (len(atomsel("fragment %s" % frag)), seg)) # Now invoke psfgen for the protein segments self.psfgen.add_segment(segid=seg, pdbfile=filename) print("Applying the following single-residue patches to P%s:\n" % frag) print("\t%s" % "\t".join(str(_) for _ in patches)) for p in patches: self.psfgen.patch(patchname=p.name, targets=p.targets()) self.psfgen.read_coords(segid=seg, filename=filename) # Fix coordinates that are out of bounds, ie 5 characters badidxs = atomsel( "fragment '%s' and (abs(x) >= 100 or abs(y) >= 100 " "or abs(z) >= 100)" % frag, molid).index for idx in badidxs: atom = atomsel("index %d" % idx, molid) self.psfgen.set_position(segid=seg, resid=atom.resid[0], atomname=atom.name[0], position=(atom.x[0], atom.y[0], atom.z[0])) if old_top != -1: molecule.set_top(old_top) fragsel.user = 0.0 return extpatches #========================================================================== def _check_psf_output(self): """ Scans the output psf from psfgen for atoms where the coordinate could not be set, indicating an unmatched atom. This check is necessary because sometimes psfgen will run with no errors or warnings but will have unmatched atoms that are all at (0,0,0). """ # Check file was written at all if not os.path.isfile('%s.pdb' % self.outprefix): raise DabbleError("\nERROR: psf file failed to write.\n" " Please see log above.\n") # Open the pdb file in VMD and check for atoms with no occupancy fileh = molecule.load('pdb', '%s.pdb' % self.outprefix) errors = atomsel("occupancy=-1", molid=fileh) # Print out error messages if errors: errstr = "\nERROR: Couldn't find the following atoms.\n" for i in range(len(errors)): errstr += "\t%s%s:%s\n" % (errors.resname[i], errors.resid[i], errors.name[i]) errstr += "Check if they are present in the original structure.\n" raise DabbleError(errstr) print("\nChecked output pdb/psf has all atoms present " "and correct.\n") #========================================================================== def _find_residue_in_rtf(self, resname, molid): """ Scans the input topology files to find a name match for the given residue name, then pulls out the atoms involved and checks that they are all present in the input coordinates, prompting the user to correct the names of atoms that could not be matched. Residue ID is used because there can be multiple copies of a residue with the same name, but only one has missing or extra atoms. Args: resname (str): Residue name to check molid (int): VMD molecule ID Returns: True if all matching was successful False if the residue name cannot be found """ print("Finding residue name '%s'" % resname) for top in self.topologies: topfile = open(top, 'r') topo_atoms = _get_atoms_from_rtf(text=topfile.readlines(), resname=resname) # Use first definition found of this residue if topo_atoms: break topfile.close() if not topo_atoms: return False print("Successfully found residue %s in input topologies" % resname) # Match up atoms with python sets pdb_atoms = set( atomsel("resname '%s' and user 1.0" % resname, molid=molid).name) pdb_only = pdb_atoms - topo_atoms topo_only = topo_atoms - pdb_atoms # If uneven number of atoms, there are missing or additional atoms if len(pdb_atoms) > len(topo_atoms): raise DabbleError( "\nERROR: Cannot process modified residue %s.\n" "There are %d extra atoms in the input structure " "that are undefined in the topology file. The " "following atoms could not be matched and may " "either be misnamed, or additional atoms:\n" "[ %s ]\n" % (resname, len(pdb_atoms) - len(topo_atoms), " ".join(pdb_only))) if len(topo_atoms) > len(pdb_atoms): raise DabbleError( "\nERROR: Cannot process modified residue %s.\n" "There are %d missing atoms in the input structure " "that are defined in the topology file. The " "following atoms could not be matched and may " "either be misnamed or deleted atoms:\n" "[ %s ]\n" % (resname, len(topo_atoms) - len(pdb_atoms), " ".join(topo_only))) # Offer to rename atoms that couldn't be matched to the topology if pdb_only: print("\nWARNING: Having some trouble with modified residue %s.\n" " The following atom names cannot be matched up " " to the input topologies. They are probably " " misnamed.\n" % resname) print(" To help you, here are the atom names that " " should be present according to the topology " " but were not found:\n") print(" [ %s ]\n" % ' '.join([str(t) for t in topo_only])) print(" Please enter a valid name for each atom as " "it appears or CTRL+D to quit..\n") for unmatched in pdb_only: print("Unmatched topology names: [ %s ]" % ' '.join(topo_only)) newname = input(" %s -> " % unmatched) while newname not in topo_only: print("'%s' is not an available name in the topology." "Please try again.\n" % newname) newname = input(" %s -> " % unmatched) atomsel("resname '%s' and user 1.0 and name '%s'" % (resname, unmatched)).name = newname pdb_atoms = set( atomsel("resname '%s' and user 1.0" % resname).name) topo_only = topo_atoms - pdb_atoms resname = newname # Recurse to check that everything is assigned correctly self._find_residue_in_rtf(resname, molid) print("Matched up all atom names for resname '%s'\n" % resname) return True #========================================================================== def _get_patch(self, seg, resid): """ Prompts the user for a patch to apply for the given residue. Gathers available patches from topology files Args: seg (str): Segment to apply the patch to resid (int): Residue ID to apply the patch to Returns: (str) patch line to put in the psfgen input file """ avail_patches = self._get_avail_patches() print("What is the patch name I should apply?") print("Type NONE for no patch, if your residue is completely " "defined in a str file") print("Or type HELP for a list of all patches I know about") patchname = input("> ") if patchname == "HELP": print(" PATCH COMMENT") print(" ----- -------") for patch in avail_patches: print("%7s %s" % (patch, avail_patches[patch])) patchname = input("> ") while (patchname not in avail_patches) and (patchname != "NONE"): print("I don't know about patch %s" % patchname) patchname = input("Try again > ") if patchname == "NONE": return "" return "patch %s %s:%d\n" % (patchname, seg, resid) #========================================================================== def _get_avail_patches(self): """ Gathers the patches defined in all topology files. Returns: (dict str -> str): Patch names as keys, comment as value """ avail_patches = {} for top in self.topologies: topfile = open(top, 'r') for line in topfile: tokens = line.split() if not tokens: continue if tokens[0] == "PRES": comment = ' '.join(tokens[tokens.index("!") + 1:]) avail_patches[tokens[1]] = comment return avail_patches #========================================================================== def _run_psfgen(self): # Read topology files in to psfgen print("Using the following topologies:") for top in self.topologies: print(" - %s" % os.path.split(top)[1]) self.psfgen.read_topology(top) # Mark all atoms as unsaved with the user field atomsel('all', molid=self.molid).user = 1.0 check_atom_names(molid=self.molid) # Save water 10k molecules at a time if atomsel('water', molid=self.molid): self._write_water_blocks() # Now ions if present, changing the atom names if atomsel('ions', molid=self.molid): self._write_ion_blocks() # Now lipid if atomsel(self.lipid_sel): self._write_lipid_blocks() # Now handle the protein # Save and reload the protein so residue looping is correct if atomsel("resname %s" % PATCHABLE_ACIDS, molid=self.molid): extpatches = set() for frag in sorted( set( atomsel("resname %s" % PATCHABLE_ACIDS, molid=self.molid).fragment)): extpatches.update(self._write_protein_blocks(self.molid, frag)) # List all patches applied to the protein print("Applying the following patches:\n") print("\t%s" % "\n\t".join(str(_) for _ in extpatches)) # Apply all multi segment patches to the protein for p in extpatches: self.psfgen.patch(p.name, p.targets()) else: print("\n\tDidn't find any protein. Continuing...\n") # Regenerate angles and dihedrals after applying patches # Angles must be regenerated FIRST! # See http://www.ks.uiuc.edu/Research/namd/mailing_list/namd-l.2009-2010/4137.html self.psfgen.regenerate_angles() self.psfgen.regenerate_dihedrals() # Check if there is anything else and let the user know about it leftovers = atomsel('user 1.0', molid=self.molid) for lig in set(leftovers.resname): residues = self._find_single_residue_names(resname=lig, molid=self.molid) self._write_generic_block(residues) # Write the output files and run self.psfgen.write_psf(filename="%s.psf" % self.outprefix, type="x-plor") self.psfgen.write_pdb(filename="%s.pdb" % self.outprefix) #========================================================================== def _prmtop_to_charmm(self): """ Converts an AMBER prmtop with AMBER parameters to a psf file, using ParmEd. """ # Save PSF topology and parameter file parmstruct = load_file(self.outprefix + ".prmtop", xyz=self.outprefix + ".inpcrd", structure=True) parmstruct.save(self.outprefix + ".psf", format="psf") # Save PDB file with coordinates m = molecule.load("parm7", self.outprefix + ".prmtop", "rst7", self.outprefix + ".inpcrd") atomsel("all", m).write("pdb", self.outprefix + ".pdb") molecule.delete(m)