Пример #1
0
    def __init__(self, molid, **kwargs):
        """
        Creates a CHARMM writer

        Args:
            molid (int): VMD molecule ID of system to write
            tmp_dir (str): Directory for temporary files. Defaults to "."
            lipid_sel (str): Lipid selection string. Defaults to "lipid"
            hmr (bool): If hydrogen masses should be repartitioned. Defaults
                to False
            forcefield (str): Forcefield to use, either "charmm" or "amber"
            water_model (str): Water model to use
            extra_topos (list of str): Additional topology (.str, .off, .lib) to
                include.
            extra_params (list of str): Additional parameter sets (.str, .frcmod)
            override_defaults (bool): If set, omits default forcefield parameters.
            debug_verbose (bool): Prints additional output, like from psfgen.
        """

        # Initialize default options
        super(CharmmWriter, self).__init__(molid, **kwargs)

        # Create a psf generator object
        self.psfgen = PsfGen()

        # Set forcefield default topologies and parameters
        self.forcefield = kwargs.get("forcefield", "charmm")
        self.water_model = kwargs.get("water_model", "tip3")

        self.topologies = self.get_topologies(self.forcefield,
                                              self.water_model)
        self.parameters = self.get_parameters(self.forcefield,
                                              self.water_model)

        if "charmm" in self.forcefield:
            if self.hmr:
                raise DabbleError("HMR not supported with CHARMM ff yet")

        # Handle override and extra topologies
        if self.override:
            self.topologies = []
            self.parameters = []

        # Now extra topologies (put in self by super __init__)
        self.topologies.extend(self.extra_topos)
        self.parameters.extend(self.extra_params)

        # Once all topologies defined, initialize matcher only if
        # using CHARMM topologies (not if we're doing a conversion)
        if "charmm" in self.forcefield or "opls" in self.forcefield:
            self.matcher = CharmmMatcher(self.topologies)

        # Keep track of segment numbers for protein and other
        self.segint = 0
Пример #2
0
def test_formats(tmpdir):
    """
    Tests read/write of psf/namdbin files
    """

    from psfgen import PsfGen
    p = str(tmpdir.mkdir("formats"))
    os.chdir(dir)

    gen = PsfGen(output=os.devnull)
    gen.read_topology("top_all36_caps.rtf")
    gen.read_topology("top_all36_prot.rtf")

    gen.add_segment(segid="P0", pdbfile="psf_protein_P0.pdb")
    gen.read_coords(segid="P0", filename="psf_protein_P0.pdb")
    gen.add_segment(segid="P1", pdbfile="psf_protein_P1.pdb")
    gen.read_coords(segid="P1", filename="psf_protein_P1.pdb")

    # Write a PSF and a NAMD binary file
    gen.write_psf(filename=os.path.join(p, "pdbin.psf"))
    gen.write_namdbin(filename=os.path.join(p, "pdbin.bin"))
    del gen

    # Read in the PSF and NAMD binary file. Topology files should be
    # automatically loaded, too. Read in coordinates also as velocities
    # to test the velocity read in as well.
    gen = PsfGen(output=os.devnull)
    os.chdir(p)
    gen.read_psf(filename=os.path.join(p, "pdbin.psf"),
                 namdbinfile=os.path.join(p, "pdbin.bin"),
                 velnamdbinfile=os.path.join(p, "pdbin.bin"))
    assert gen.get_topologies() == ["top_all36_caps.rtf", "top_all36_prot.rtf"]
    assert gen.get_segids() == ["P0", "P1"]
    assert gen.get_coordinates(segid="P0", resid=1) \
        == gen.get_velocities(segid="P0", resid=1)
Пример #3
0
def test_query():
    """
    Tests that query functions work correctly
    """

    from psfgen import PsfGen
    gen = PsfGen(output=os.devnull)
    os.chdir(dir)

    gen.read_topology("top_all36_caps.rtf")
    gen.read_topology("top_all36_prot.rtf")

    gen.add_segment(segid="P0", pdbfile="psf_protein_P0.pdb")
    gen.read_coords(segid="P0", filename="psf_protein_P0.pdb")

    gen.add_segment(segid="P1", pdbfile="psf_protein_P1.pdb")
    gen.read_coords(segid="P1", filename="psf_protein_P1.pdb")

    gen.patch(patchname="DISU", targets=[("P0", "10"), ("P0", "15")])

    assert gen.get_topologies() == ["top_all36_caps.rtf", "top_all36_prot.rtf"]

    # Check residue names query
    resnames = gen.get_residue_types()
    assert len(resnames) == 26
    assert "CYS" in resnames
    assert "TIP3" not in resnames

    # Check patches query
    patches = gen.get_patches(list_all=True)
    assert len(patches) == 25
    assert "CYSD" in patches
    assert "SEP" not in patches

    # Check segids query
    assert gen.get_segids() == ["P0", "P1"]

    # Check resids query
    assert gen.get_resids("P0") == [str(_) for _ in range(1, 26)]
    assert gen.get_resids("P1") == [str(_) for _ in range(0, 31)]

    # Check resname query, with str or int
    assert gen.get_resname(segid="P0", resid="2") == "LEU"
    assert gen.get_resname(segid="P1", resid=29) == "SER"

    # Check applied patches query
    assert gen.get_patches() == [("DISU", "P0", "10"), ("DISU", "P0", "15")]
    assert gen.get_first(segid="P0") is None
    assert gen.get_last(segid="P1") is None

    # Check atom queries
    assert gen.get_atom_names(segid="P0", resid="10") \
                == ['N', 'HN', 'CA', 'HA', 'CB', 'HB1', 'HB2', 'SG', 'C', 'O']
    assert set(gen.get_masses(segid="P0", resid=1)) == {1.008, 12.011, 15.999}
    assert gen.get_atom_indices(segid="P1", resid=0) == list(range(1, 7))
    assert set(gen.get_charges(segid="P0", resid="10")) \
        == {-0.47, 0.07, 0.09, 0.31, -0.1, -0.08, 0.51, -0.51}

    # Check coordinates and velocities
    assert len(gen.get_coordinates(segid="P1", resid=25)) == 17
    assert set(gen.get_velocities(segid="P1", resid=1)) == {(0., 0., 0.)}
Пример #4
0
def test_single_chain(tmpdir):
    """
    Tests simple realistic system building
    """

    from psfgen import PsfGen
    p = str(tmpdir.mkdir("single_chain"))
    os.chdir(dir)

    gen = PsfGen(output=os.devnull)
    gen.read_topology("top_all36_caps.rtf")
    gen.read_topology("top_all36_prot.rtf")
    gen.read_topology("top_water_ions.rtf")

    # Read protein
    gen.add_segment(segid="P0", pdbfile="psf_protein_P0.pdb")
    gen.read_coords(segid="P0", filename="psf_protein_P0.pdb")

    gen.add_segment(segid="P1", pdbfile="psf_protein_P1.pdb")
    gen.read_coords(segid="P1", filename="psf_protein_P1.pdb")

    # Read waters, with 10k atoms per file to avoid PDB limitations
    gen.add_segment(segid="W0", pdbfile="psf_wat_0.pdb")
    gen.read_coords(segid="W0", filename="psf_wat_0.pdb")

    gen.add_segment(segid="W1", pdbfile="psf_wat_1.pdb")
    gen.read_coords(segid="W1", filename="psf_wat_1.pdb")

    # Read ions
    gen.add_segment(segid="I", pdbfile="psf_ions.pdb")
    gen.read_coords(segid="I", filename="psf_ions.pdb")

    # Add disulfides
    gen.patch(patchname="DISU", targets=[("P0", "10"), ("P0", "15")])
    gen.patch(patchname="DISU", targets=[("P0", "24"), ("P1", "23")])
    gen.patch(patchname="DISU", targets=[("P0", "11"), ("P1", "11")])

    # Regenerate
    gen.regenerate_angles()
    gen.regenerate_dihedrals()

    # Write
    os.chdir(p)
    gen.write_psf(filename="output.psf")
    gen.write_pdb(filename="output.pdb")

    # Load as a molecule with vmd-python and check it's correct
    m = molecule.load("psf", "output.psf", "pdb", "output.pdb")
    check_correctness(m)
    molecule.delete(m)
Пример #5
0
def test_case_sensitivity():
    """
    Tests setting case sensitivity. Do this with 2 objects because you can't
    change the setting after reading in topology files.
    """

    from psfgen import PsfGen
    os.chdir(dir)

    gen = PsfGen(case_sensitive=True, output=os.devnull)
    gen.read_topology("top_casesensitive.rtf")
    assert gen.get_residue_types() == ["ACE", "Ace"]

    # Can't change case sensitivity after topologies have been read
    with pytest.raises(ValueError):
        gen.case_sensitive = False
    del gen

    gen = PsfGen(case_sensitive=True, output=os.devnull)
    gen.case_sensitive = False
    gen.read_topology("top_casesensitive.rtf")
    assert gen.get_residue_types() == ["ACE"]
Пример #6
0
def test_delete():
    """
    Tests removing atoms
    """
    from psfgen import PsfGen
    os.chdir(dir)

    gen = PsfGen(output=os.devnull)
    gen.read_topology("top_all36_caps.rtf")
    gen.read_topology("top_all36_prot.rtf")

    gen.add_segment(segid="P0", pdbfile="psf_protein_P0.pdb")
    gen.read_coords(segid="P0", filename="psf_protein_P0.pdb")

    # Delete a specific atom
    assert "CAY" in gen.get_atom_names(segid="P0", resid=1)
    gen.delete_atoms(segid="P0", resid=1, atomname="CAY")
    assert "CAY" not in gen.get_atom_names(segid="P0", resid=1)

    # Try deleting a capping group
    assert gen.get_resids("P0") == [str(_) for _ in range(1, 26)]
    gen.delete_atoms(segid="P0", resid=1)
    assert gen.get_resids("P0") == [str(_) for _ in range(2, 26)]

    # Add and then delete a segment
    gen.add_segment(segid="DELETE", pdbfile="psf_protein_P1.pdb")
    assert gen.get_segids() == ["P0", "DELETE"]
    gen.delete_atoms(segid="DELETE")
    assert gen.get_segids() == ["P0"]
Пример #7
0
def test_mutation(tmpdir):
    """
    Tests mutation of L2A in chain 0. Also as a result tests guessing
    coordinates
    """
    from psfgen import PsfGen
    p = str(tmpdir.mkdir("mutation"))
    os.chdir(dir)

    gen = PsfGen(output=os.devnull)
    gen.read_topology("top_all36_caps.rtf")
    gen.read_topology("top_all36_prot.rtf")

    gen.add_segment(segid="P0",
                    pdbfile="psf_protein_P0.pdb",
                    mutate=[("2", "ALA")])
    gen.read_coords(segid="P0", filename="psf_protein_P0.pdb")
    gen.patch(patchname="DISU", targets=[("P0", "10"), ("P0", "15")])

    # Guess coordinates for ALA mutation
    gen.guess_coords()

    # Set one specific coordinate
    gen.set_position(segid="P0",
                     resid="2",
                     atomname="HB1",
                     position=(1.0, 2.0, 3.0))

    # Regenerate
    gen.regenerate_angles()
    gen.regenerate_dihedrals()

    # Write
    os.chdir(p)
    gen.write_psf(filename="output.psf")
    gen.write_pdb(filename="output.pdb")

    # Check results with vmd-python
    m = molecule.load("psf", "output.psf", "pdb", "output.pdb")
    assert len(set(atomsel("protein").fragment)) == 1
    assert len(set(atomsel("resname ACE NMA NME").residue)) == 2

    # Test mutation happened and resid 2 is ALA not LEU
    assert set(atomsel("resid 2").resname) == set(["ALA"])

    # Check coordinate guessing happened and HB3 has a nonzero position
    assert atomsel("resid 2 and name HB3").x != [0.0]
    assert atomsel("resid 2 and name HB3").y != [0.0]
    assert atomsel("resid 2 and name HB3").z != [0.0]

    # Check manual coordinate setting happened
    assert atomsel("resid 2 and name HB1").x == [1.0]
    assert atomsel("resid 2 and name HB1").y == [2.0]
    assert atomsel("resid 2 and name HB1").z == [3.0]

    molecule.delete(m)
Пример #8
0
def test_ends(tmpdir):
    """
    Tests adding patches to the beginning and end, as well as adding
    residues in the segment
    """
    from psfgen import PsfGen
    p = str(tmpdir.mkdir("mutation"))
    os.chdir(dir)

    gen = PsfGen(output=os.devnull)
    gen.read_topology("top_all36_prot.rtf")

    # Add neutral N-terminus
    # Add an alanine then a protonated glutamate at the C-terminus.
    gen.add_segment(segid="P",
                    pdbfile="protein_nocaps.pdb",
                    first="NTER",
                    last="GLUP",
                    residues=[("25", "ALA"), ("26", "GLU")])

    # Set coordinates and regenerate angles and dihedrals
    gen.read_coords(segid="P", filename="protein_nocaps.pdb")
    gen.guess_coords()

    # Check internal state
    assert gen.get_resids("P") == [str(_) for _ in range(2, 27)]
    assert gen.get_resname(segid="P", resid=25) == "ALA"
    assert gen.get_patches(list_defaults=True) == [('GLUP', 'P', '26'),
                                                   ('NTER', 'P', '2')]
    assert gen.get_first(segid="P") == "NTER"
    assert gen.get_last(segid="P") == "GLUP"

    # Output
    os.chdir(p)
    gen.write_psf(filename="output.psf")
    gen.write_pdb(filename="output.pdb")

    # Check all resids are present and that 2 extra ones were added
    m = molecule.load("psf", "output.psf", "pdb", "output.pdb")
    assert list(set(atomsel("all").resid)) == list(range(2, 27))
    assert len(atomsel("all")) == 382
    assert set(atomsel("resid 25").resname) == set(["ALA"])

    # Check patches were applied correctly
    assert "HT1" in atomsel("resid 2").name
    assert "HN" not in atomsel("resid 2").name
    assert "HE2" in atomsel("resid 26").name

    # Check all coordinates are set
    assert 0.0 not in atomsel("all").x
    assert 0.0 not in atomsel("all").y
    assert 0.0 not in atomsel("all").z

    molecule.delete(m)
Пример #9
0
def test_alias():
    """
    Tests atom and residue aliases, either at the topology or the PDB level
    """
    from psfgen import PsfGen
    os.chdir(dir)

    gen = PsfGen(output=os.devnull)
    gen.read_topology("top_all36_caps.rtf")
    gen.read_topology("top_all36_prot.rtf")

    gen.alias_residue(top_resname="LEU", pdb_resname="LEX")
    gen.alias_residue(top_resname="ARG", pdb_resname="AAA")
    gen.alias_atom(top_atomname="N", pdb_atomname="NOOO", resname="PHE")

    gen.add_segment(segid="P", pdbfile="protein_newnames.pdb")
    gen.read_coords(segid="P", filename="psf_protein_P0.pdb")

    assert gen.get_resname(segid="P", resid=2) == "LEU"
    assert gen.get_resname(segid="P", resid=5) == "ALA"
    assert "N" in gen.get_atom_names(segid="P", resid=23)
Пример #10
0
def test_set():
    """
    Tests that setters work correctly
    """
    from psfgen import PsfGen
    os.chdir(dir)

    gen = PsfGen(output=os.devnull)
    gen.read_topology("top_all36_caps.rtf")
    gen.read_topology("top_all36_prot.rtf")

    gen.add_segment(segid="P", pdbfile="psf_protein_P1.pdb")
    assert gen.get_segids() == ["P"]

    # Set segid
    gen.set_segid(segid="P", new_segid="P1")
    assert gen.get_segids() == ["P1"]

    gen.read_coords(segid="P1", filename="psf_protein_P1.pdb")

    # Set resname
    assert gen.get_resname(segid="P1", resid="1") == "ASP"
    gen.set_resname(segid="P1", resid="1", new_resname="ASH")
    assert gen.get_resname(segid="P1", resid="1") == "ASH"

    # Set charge
    gen.set_charge(segid="P1", resid="1", atomname="O", charge=-1.)
    assert -1.0 in gen.get_charges(segid="P1", resid="1")

    # Set atom name
    gen.set_atom_name(segid="P1", resid="1", atomname="N", new_atomname="NO")
    assert "N" not in gen.get_atom_names(segid="P1", resid="1")
    assert "NO" in gen.get_atom_names(segid="P1", resid="1")

    # Set coord
    gen.set_position(segid="P1",
                     resid="1",
                     atomname="HN",
                     position=(0., 0., -1.))
    assert (0., 0., -1.) in gen.get_coordinates(segid="P1", resid="1")

    # Set velocity
    gen.set_velocity(segid="P1",
                     resid="1",
                     atomname="NO",
                     velocity=(
                         5.,
                         5.,
                         3.,
                     ))
    assert (
        5.,
        5.,
        3.,
    ) in gen.get_velocities(segid="P1", resid="1")
Пример #11
0
class CharmmWriter(MoleculeWriter):
    """
    An object that handles all the conversions to a psf file
    by interfacing with psfgen.

    Writes a pdb/psf file pair from the current molecule using the
    CHARMM36 topology and atom names/types. Interfaces with psfgen by
    dynamically generating the .tcl file that psfgen takes as input.
    Prompts the user for additional topology files and helps with
    matching atom names that cannot be automatically translated to the
    charmm naming conventions.
    """

    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    #                               CONSTANTS                                  #
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

    WATER_NAMES = {
        "tip3": "TIP3",
        "tip4e": "TP4E",
        "spce": "SPCE",
    }
    WATER_O_NAME = "OH2"
    WATER_H_NAMES = ["H1", "H2"]

    #==========================================================================

    def __init__(self, molid, **kwargs):
        """
        Creates a CHARMM writer

        Args:
            molid (int): VMD molecule ID of system to write
            tmp_dir (str): Directory for temporary files. Defaults to "."
            lipid_sel (str): Lipid selection string. Defaults to "lipid"
            hmr (bool): If hydrogen masses should be repartitioned. Defaults
                to False
            forcefield (str): Forcefield to use, either "charmm" or "amber"
            water_model (str): Water model to use
            extra_topos (list of str): Additional topology (.str, .off, .lib) to
                include.
            extra_params (list of str): Additional parameter sets (.str, .frcmod)
            override_defaults (bool): If set, omits default forcefield parameters.
            debug_verbose (bool): Prints additional output, like from psfgen.
        """

        # Initialize default options
        super(CharmmWriter, self).__init__(molid, **kwargs)

        # Create a psf generator object
        self.psfgen = PsfGen()

        # Set forcefield default topologies and parameters
        self.forcefield = kwargs.get("forcefield", "charmm")
        self.water_model = kwargs.get("water_model", "tip3")

        self.topologies = self.get_topologies(self.forcefield,
                                              self.water_model)
        self.parameters = self.get_parameters(self.forcefield,
                                              self.water_model)

        if "charmm" in self.forcefield:
            if self.hmr:
                raise DabbleError("HMR not supported with CHARMM ff yet")

        # Handle override and extra topologies
        if self.override:
            self.topologies = []
            self.parameters = []

        # Now extra topologies (put in self by super __init__)
        self.topologies.extend(self.extra_topos)
        self.parameters.extend(self.extra_params)

        # Once all topologies defined, initialize matcher only if
        # using CHARMM topologies (not if we're doing a conversion)
        if "charmm" in self.forcefield or "opls" in self.forcefield:
            self.matcher = CharmmMatcher(self.topologies)

        # Keep track of segment numbers for protein and other
        self.segint = 0

    #=========================================================================

    def write(self, filename):
        """
        Writes the parameter and topology files

        Args:
            filename (str): File name to write. File type suffix will be added.
        """
        self.outprefix = filename

        # Put our molecule on top
        old_top = molecule.get_top()
        molecule.set_top(self.molid)

        # Amber forcefield done with AmberWriter then conversion
        if "amber" in self.forcefield:
            # Avoid circular import by doing it here
            from dabble.param import AmberWriter
            prmtopgen = AmberWriter(molid=self.molid,
                                    tmp_dir=self.tmp_dir,
                                    forcefield=self.forcefield,
                                    water_model=self.water_model,
                                    hmr=self.hmr,
                                    lipid_sel=self.lipid_sel,
                                    extra_topos=self.extra_topos,
                                    extra_params=self.extra_params,
                                    override_defaults=self.override,
                                    debug_verbose=self.debug)
            prmtopgen.write(self.outprefix)
            self._prmtop_to_charmm()

        # Charmm forcefield
        elif "charmm" in self.forcefield:
            self._run_psfgen()

        # OPLS forcefield. Same as charmm but list separately for readability
        elif "opls" in self.forcefield:
            self._run_psfgen()

        else:
            raise DabbleError("Unsupported forcefield '%s' for CharmmWriter" %
                              self.forcefield)

        # Check output and finish up
        self._check_psf_output()

        # Reset top molecule
        molecule.set_top(old_top)

    #=========================================================================
    #                           Static methods                               #
    #=========================================================================

    @classmethod
    def get_topologies(cls, forcefield, water_model):

        if forcefield == "charmm":
            topos = [
                "top_all36_caps.rtf", "top_all36_cgenff.rtf",
                "top_all36_prot.rtf", "top_all36_lipid.rtf",
                "top_all36_carb.rtf", "top_all36_na.rtf",
                "toppar_all36_prot_na_combined.str",
                "toppar_all36_prot_fluoro_alkanes.str"
            ]
            if water_model == "tip3":
                topos.append("toppar_water_ions.str")
            elif water_model == "tip4e":
                topos.append("toppar_water_ions_tip4p_ew.str")
            elif water_model == "spce":
                topos.append("toppar_water_ions_spc_e.str")

        elif forcefield == "opls":
            topos = ["opls_aam.rtf", "opls_aam_caps.rtf"]
            if water_model != "tip3":
                raise DabbleError("Only TIP3 water model supported for OPLS")

        elif forcefield == "amber":
            from dabble.param import AmberWriter  # avoid circular dependency
            return AmberWriter.get_topologies(forcefield, water_model)

        else:
            raise ValueError("Invalid forcefield: '%s'" % forcefield)

        return [cls._get_forcefield_path(top) for top in topos]

    #=========================================================================

    @classmethod
    def get_parameters(cls, forcefield, water_model):

        if forcefield == "charmm":
            prms = [
                "par_all36m_prot.prm", "par_all36_cgenff.prm",
                "par_all36_lipid.prm", "par_all36_carb.prm",
                "par_all36_na.prm", "toppar_all36_prot_na_combined.str"
            ]
            if water_model == "tip3":
                prms.append("toppar_water_ions.str")
            elif water_model == "tip4e":
                prms.append("toppar_water_ions_tip4p_ew.str")
            elif water_model == "spce":
                prms.append("toppar_water_ions_spc_e.str")

        elif forcefield == "amber":
            from dabble.param import AmberWriter  # avoid circular dependency
            return AmberWriter.get_parameters(forcefield, water_model)

        elif forcefield == "opls":
            prms = ["opls_aam.prm"]
            if water_model != "tip3":
                raise DabbleError("Only TIP3 water model supported for OPLS")

        else:
            raise ValueError("Invalid forcefield: '%s'" % forcefield)

        return [cls._get_forcefield_path(par) for par in prms]

    #=========================================================================
    #                           Private methods                              #
    #=========================================================================

    def _write_water_blocks(self):
        """
        Writes a lot of temporary files with 10000 waters each, to bypass
        psfgen being stupid with files containing more than 10000 of a residue.
        """
        # Set water names and write them to PDB file(s)
        self._set_water_names()
        pdbs = self._write_water_pdbs()

        for i, pdb in enumerate(pdbs):
            self.psfgen.add_segment(segid="W%d" % i, pdbfile=pdb)
            self.psfgen.read_coords(segid="W%d" % i, filename=pdb)

        # If water model includes dummy atoms, guess the coordinates
        # This is safe as only waters have been added to the psfgen state
        # so far, so actually broken atoms won't be fixed on accident.
        if self.water_model != "tip3":
            self.psfgen.guess_coords()
            self.psfgen.regenerate_angles()
            self.psfgen.regenerate_dihedrals()

    #==========================================================================

    def _write_lipid_blocks(self):
        """
        Writes a temporary PDB file containing the lipids for later use by
        psfgen. Renumbers the lipid residues because some can have **** instead
        of an integer for resid in large systems, which will crash psfgen. Also
        sets atom names for some common lipids (currently POPC)

        Raises:
            NotImplementedError if more than 10,000 lipids are present since it
              doesn't support feeding multiple lipid blocks to psfgen currently
            NotImplementedError if lipid other than POPC,POPE,POPG is found
        """
        # Put current molecule on top to simplify atom selection
        old_top = molecule.get_top()
        molecule.set_top(self.molid)

        # Collect lipid residues up
        alll = atomsel('(%s) and user 1.0' % self.lipid_sel)
        residues = list(set(alll.residue))

        # Lipids not compatible with AMBER parameters, CHARMM format
        if alll and ("amber" in self.forcefield or "opls" in self.forcefield):
            raise ValueError(
                "AMBER or OPLS parameters not supported for lipids"
                " in CHARMM output format")

        # Sanity check for < 10k lipids
        if len(residues) >= 10000:
            raise NotImplementedError("More than 10k lipids found")

        # Loop through all residues and renumber and correctly name them
        lipress = []
        for resname in set(alll.resname):
            lipress.extend(self._rename_by_resname(resname, renumber=True))

        # Write temporary lipid pdb
        _, temp = tempfile.mkstemp(suffix='.pdb',
                                   prefix='psf_lipid_',
                                   dir=self.tmp_dir)
        os.close(_)

        saved_lips = atomsel("residue %s" % ' '.join(str(_) for _ in lipress))
        saved_lips.user = 0.0
        saved_lips.write('pdb', temp)

        # Generate lipid segment
        self.psfgen.add_segment(segid="L", pdbfile=temp)
        self.psfgen.read_coords(segid="L", filename=temp)

        # Put old top back
        molecule.set_top(old_top)

    #==========================================================================

    def _write_ion_blocks(self):
        """
        Writes a PDB file containing correctly named ions for use by
        psfgen, and instructs psfgen to use it in TCL code.
        """

        # Put our molecule on top to simplify atom selection language
        old_top = molecule.get_top()
        molecule.set_top(self.molid)

        # Select all ions
        allions = []
        for resname in set(atomsel("numbonds 0").resname):
            allions.extend(self._rename_by_resname(resname, renumber=True))

        # Stop if no ions were found
        if not allions:
            return

        # Save ions as pdb
        allsel = atomsel("residue %s" % " ".join(str(_) for _ in allions))
        allsel.resid = range(len(allsel))
        allsel.user = 0.0
        _, temp = tempfile.mkstemp(suffix=".pdb",
                                   prefix="psf_ions_",
                                   dir=self.tmp_dir)
        os.close(_)
        allsel.write("pdb", temp)

        self.psfgen.add_segment(segid="I", pdbfile=temp)
        self.psfgen.read_coords(segid="I", filename=temp)

        molecule.set_top(old_top)

    #==========================================================================

    def _find_single_residue_names(self, resname, molid):
        """
        Uses graph matcher and available topologies to match up
        ligand names automatically. Tries to use graphs, and if there's an
        uneven number of atoms tries to match manually to suggest which atoms
        are most likely missing.

        Args:
          resname (str): Residue name of the ligand that will be written.
            All ligands will be checked separately against the graphs.
          molid (int): VMD molecule ID to consider

        Returns:
          (list of ints): Residue numbers (not resid) of all input ligands
            that were successfully matched. Need to do it this way since
            residue names can be changed in here to different things.

        Raises:
          ValueError if number of resids does not match number of residues as
            interpreted by VMD
          NotImplementedError if a residue could not be matched to a graph.
        """
        # Put our molecule on top
        old_top = molecule.get_top()
        molecule.set_top(molid)

        # Sanity check that there is no discrepancy between defined resids and
        # residues as interpreted by VMD.
        residues = set(atomsel("user 1.0 and resname '%s'" % resname).residue)

        for chain in set(atomsel("user 1.0 and resname '%s'" % resname).chain):
            tempres = set(
                atomsel("user 1.0 and resname '%s' and chain %s" %
                        (resname, chain)).residue)
            resids = set(
                atomsel("user 1.0 and resname '%s' and chain %s" %
                        (resname, chain)).resid)
            if len(tempres) != len(resids):
                raise DabbleError("VMD found %d residues for resname '%s', "
                                  "but there are %d resids in chain %s! "
                                  "Check input." %
                                  (len(tempres), resname, len(resids), chain))

        for residue in residues:
            sel = atomsel("residue %s and resname '%s' and user 1.0" %
                          (residue, resname))

            newname, atomnames = self.matcher.get_names(sel,
                                                        print_warning=True)
            if not newname:
                resname, patch, atomnames = self.matcher.get_patches(sel)

                if not newname:
                    print(
                        "ERROR: Could not find a residue definition for %s:%s"
                        % (resname, residue))
                    raise NotImplementedError(
                        "No residue definition for %s:%s" % (resname, residue))
                print("\tApplying patch %s to ligand %s" % (patch, newname))

            # Do the renaming
            self._apply_naming_dictionary(atomnames=atomnames,
                                          resnames=newname,
                                          verbose=True)

        molecule.set_top(old_top)

        return list(residues)

    #==========================================================================

    def _write_generic_block(self, residues):
        """
        Matches ligands to available topology file, renames atoms, and then
        writes temporary files for the ligands

        Args:
          residues (list of int): Residue numbers to be written. Will all
            be written to one segment.

        Returns:
          True if successful
        """
        # Put our molecule on top to simplify atom selection language
        old_top = molecule.get_top()
        molecule.set_top(self.molid)

        alig = atomsel('user 1.0 and residue %s' %
                       " ".join([str(x) for x in residues]))

        # Write temporary file containg the residues and update tcl commands
        _, temp = tempfile.mkstemp(suffix='.pdb',
                                   prefix='psf_block_',
                                   dir=self.tmp_dir)
        os.close(_)
        alig.write('pdb', temp)
        alig.user = 0.0

        # Get next available segment name
        segname = "B%d" % self.segint
        self.segint += 1
        self.psfgen.add_segment(segid=segname, pdbfile=temp)
        self.psfgen.read_coords(segid=segname, filename=temp)

        if old_top != -1:
            molecule.set_top(old_top)
        return True

    #==========================================================================

    def _write_protein_blocks(self, molid, frag):
        """
        Writes a protein fragment to a pdb file for input to psfgen
        Automatically assigns amino acid names

        Args:
            molid (int): VMD molecule ID of renumbered protein
            frag (str): Fragment to write

        Returns:
            (list of Patches): Patches to add to psfgen input files
       """

        print("Setting protein atom names")

        # Put our molecule on top to simplify atom selection language
        old_top = molecule.get_top()
        molecule.set_top(molid)
        patches = set()
        extpatches = set()

        # Get a unique and reliabe segment name
        seg = self.matcher.get_protein_segname(molid, frag)
        fragsel = atomsel("fragment '%s'" % frag)

        residues = list(set(fragsel.residue))
        for residue in residues:
            sel = atomsel('residue %s' % residue)
            resid = sel.resid[0]

            # Only try to match single amino acid if there are 1 or 2 bonds
            if len(self.matcher.get_extraresidue_atoms(sel)) < 3:
                (newname, atomnames) = self.matcher.get_names(sel, False)

            # See if it's a disulfide bond participant
            else:
                (newname, patch, atomnames) = \
                        self.matcher.get_disulfide("residue %d" % residue,
                                                   molid)
                if newname:
                    extpatches.add(patch)

            # Couldn't find a match. See if it's a patched residue
            if not newname:
                (newname, patchname, atomnames) = self.matcher.get_patches(sel)
                if newname:
                    # This returns patch name only, not a Patch object
                    patches.add(
                        Patch(name=patchname, segids=[seg], resids=[resid]))

            # Fall through to error condition
            if not newname:
                raise DabbleError("Couldn't find a patch for %s:%s" %
                                  (sel.resname[0], resid))

            # Do the renaming
            self._apply_naming_dictionary(atomnames=atomnames,
                                          resnames=newname)

        # Save protein chain in the correct order
        filename = self.tmp_dir + '/psf_protein_%s.pdb' % seg
        _write_ordered_pdb(filename, "fragment '%s'" % frag, molid)
        print("\tWrote %d atoms to the protein segment %s" %
              (len(atomsel("fragment %s" % frag)), seg))

        # Now invoke psfgen for the protein segments
        self.psfgen.add_segment(segid=seg, pdbfile=filename)

        print("Applying the following single-residue patches to P%s:\n" % frag)
        print("\t%s" % "\t".join(str(_) for _ in patches))
        for p in patches:
            self.psfgen.patch(patchname=p.name, targets=p.targets())

        self.psfgen.read_coords(segid=seg, filename=filename)

        # Fix coordinates that are out of bounds, ie 5 characters
        badidxs = atomsel(
            "fragment '%s' and (abs(x) >= 100 or abs(y) >= 100 "
            "or abs(z) >= 100)" % frag, molid).index
        for idx in badidxs:
            atom = atomsel("index %d" % idx, molid)
            self.psfgen.set_position(segid=seg,
                                     resid=atom.resid[0],
                                     atomname=atom.name[0],
                                     position=(atom.x[0], atom.y[0],
                                               atom.z[0]))

        if old_top != -1:
            molecule.set_top(old_top)

        fragsel.user = 0.0
        return extpatches

    #==========================================================================

    def _check_psf_output(self):
        """
        Scans the output psf from psfgen for atoms where the coordinate
        could not be set, indicating an unmatched atom. This check is necessary
        because sometimes psfgen will run with no errors or warnings but will
        have unmatched atoms that are all at (0,0,0).
        """

        # Check file was written at all
        if not os.path.isfile('%s.pdb' % self.outprefix):
            raise DabbleError("\nERROR: psf file failed to write.\n"
                              "       Please see log above.\n")

        # Open the pdb file in VMD and check for atoms with no occupancy
        fileh = molecule.load('pdb', '%s.pdb' % self.outprefix)
        errors = atomsel("occupancy=-1", molid=fileh)

        # Print out error messages
        if errors:
            errstr = "\nERROR: Couldn't find the following atoms.\n"
            for i in range(len(errors)):
                errstr += "\t%s%s:%s\n" % (errors.resname[i], errors.resid[i],
                                           errors.name[i])

            errstr += "Check if they are present in the original structure.\n"
            raise DabbleError(errstr)

        print("\nChecked output pdb/psf has all atoms present "
              "and correct.\n")

    #==========================================================================

    def _find_residue_in_rtf(self, resname, molid):
        """
        Scans the input topology files to find a name match for the given
        residue name, then pulls out the atoms involved and checks that they
        are all present in the input coordinates, prompting the user to correct
        the names of atoms that could not be matched.

        Residue ID is used because there can be multiple copies of a residue
        with the same name, but only one has missing or extra atoms.

        Args:
          resname (str): Residue name to check
          molid (int): VMD molecule ID

        Returns:
          True if all matching was successful
          False if the residue name cannot be found
        """

        print("Finding residue name '%s'" % resname)
        for top in self.topologies:
            topfile = open(top, 'r')
            topo_atoms = _get_atoms_from_rtf(text=topfile.readlines(),
                                             resname=resname)
            # Use first definition found of this residue
            if topo_atoms:
                break
            topfile.close()
        if not topo_atoms:
            return False
        print("Successfully found residue %s in input topologies" % resname)

        # Match up atoms with python sets
        pdb_atoms = set(
            atomsel("resname '%s' and user 1.0" % resname, molid=molid).name)
        pdb_only = pdb_atoms - topo_atoms
        topo_only = topo_atoms - pdb_atoms

        # If uneven number of atoms, there are missing or additional atoms
        if len(pdb_atoms) > len(topo_atoms):
            raise DabbleError(
                "\nERROR: Cannot process modified residue %s.\n"
                "There are %d extra atoms in the input structure "
                "that are undefined in the topology file. The "
                "following atoms could not be matched and may "
                "either be misnamed, or additional atoms:\n"
                "[ %s ]\n" % (resname, len(pdb_atoms) - len(topo_atoms),
                              " ".join(pdb_only)))

        if len(topo_atoms) > len(pdb_atoms):
            raise DabbleError(
                "\nERROR: Cannot process modified residue %s.\n"
                "There are %d missing atoms in the input structure "
                "that are defined in the topology file. The "
                "following atoms could not be matched and may "
                "either be misnamed or deleted atoms:\n"
                "[ %s ]\n" % (resname, len(topo_atoms) - len(pdb_atoms),
                              " ".join(topo_only)))

        # Offer to rename atoms that couldn't be matched to the topology
        if pdb_only:
            print("\nWARNING: Having some trouble with modified residue %s.\n"
                  "         The following atom names cannot be matched up "
                  " to the input topologies. They are probably "
                  " misnamed.\n" % resname)
            print("         To help you, here are the atom names that "
                  " should be present according to the topology "
                  " but were not found:\n")
            print("         [ %s ]\n" % ' '.join([str(t) for t in topo_only]))
            print(" Please enter a valid name for each atom as "
                  "it appears or CTRL+D to quit..\n")
            for unmatched in pdb_only:
                print("Unmatched topology names: [ %s ]" % ' '.join(topo_only))

                newname = input("  %s  -> " % unmatched)
                while newname not in topo_only:
                    print("'%s' is not an available name in the topology."
                          "Please try again.\n" % newname)
                    newname = input("  %s  -> " % unmatched)
                atomsel("resname '%s' and user 1.0 and name '%s'" %
                        (resname, unmatched)).name = newname
                pdb_atoms = set(
                    atomsel("resname '%s' and user 1.0" % resname).name)
                topo_only = topo_atoms - pdb_atoms
                resname = newname

            # Recurse to check that everything is assigned correctly
            self._find_residue_in_rtf(resname, molid)
        print("Matched up all atom names for resname '%s'\n" % resname)
        return True

    #==========================================================================

    def _get_patch(self, seg, resid):
        """
        Prompts the user for a patch to apply for the given residue.
        Gathers available patches from topology files

        Args:
          seg (str): Segment to apply the patch to
          resid (int): Residue ID to apply the patch to

        Returns:
          (str) patch line to put in the psfgen input file
        """
        avail_patches = self._get_avail_patches()
        print("What is the patch name I should apply?")
        print("Type NONE for no patch, if your residue is completely "
              "defined in a str file")
        print("Or type HELP for a list of all patches I know about")
        patchname = input("> ")
        if patchname == "HELP":
            print("   PATCH     COMMENT")
            print("   -----     -------")
            for patch in avail_patches:
                print("%7s %s" % (patch, avail_patches[patch]))
            patchname = input("> ")
        while (patchname not in avail_patches) and (patchname != "NONE"):
            print("I don't know about patch %s" % patchname)
            patchname = input("Try again > ")
        if patchname == "NONE":
            return ""

        return "patch %s %s:%d\n" % (patchname, seg, resid)

    #==========================================================================

    def _get_avail_patches(self):
        """
        Gathers the patches defined in all topology files.

        Returns:
          (dict str -> str): Patch names as keys, comment as value
        """
        avail_patches = {}
        for top in self.topologies:
            topfile = open(top, 'r')
            for line in topfile:
                tokens = line.split()
                if not tokens:
                    continue
                if tokens[0] == "PRES":
                    comment = ' '.join(tokens[tokens.index("!") + 1:])
                    avail_patches[tokens[1]] = comment
        return avail_patches

    #==========================================================================

    def _run_psfgen(self):

        # Read topology files in to psfgen
        print("Using the following topologies:")
        for top in self.topologies:
            print("  - %s" % os.path.split(top)[1])
            self.psfgen.read_topology(top)

        # Mark all atoms as unsaved with the user field
        atomsel('all', molid=self.molid).user = 1.0
        check_atom_names(molid=self.molid)

        # Save water 10k molecules at a time
        if atomsel('water', molid=self.molid):
            self._write_water_blocks()

        # Now ions if present, changing the atom names
        if atomsel('ions', molid=self.molid):
            self._write_ion_blocks()

        # Now lipid
        if atomsel(self.lipid_sel):
            self._write_lipid_blocks()

        # Now handle the protein
        # Save and reload the protein so residue looping is correct
        if atomsel("resname %s" % PATCHABLE_ACIDS, molid=self.molid):
            extpatches = set()
            for frag in sorted(
                    set(
                        atomsel("resname %s" % PATCHABLE_ACIDS,
                                molid=self.molid).fragment)):
                extpatches.update(self._write_protein_blocks(self.molid, frag))

            # List all patches applied to the protein
            print("Applying the following patches:\n")
            print("\t%s" % "\n\t".join(str(_) for _ in extpatches))

            # Apply all multi segment patches to the protein
            for p in extpatches:
                self.psfgen.patch(p.name, p.targets())
        else:
            print("\n\tDidn't find any protein. Continuing...\n")

        # Regenerate angles and dihedrals after applying patches
        # Angles must be regenerated FIRST!
        # See http://www.ks.uiuc.edu/Research/namd/mailing_list/namd-l.2009-2010/4137.html
        self.psfgen.regenerate_angles()
        self.psfgen.regenerate_dihedrals()

        # Check if there is anything else and let the user know about it
        leftovers = atomsel('user 1.0', molid=self.molid)
        for lig in set(leftovers.resname):
            residues = self._find_single_residue_names(resname=lig,
                                                       molid=self.molid)
            self._write_generic_block(residues)

        # Write the output files and run
        self.psfgen.write_psf(filename="%s.psf" % self.outprefix,
                              type="x-plor")
        self.psfgen.write_pdb(filename="%s.pdb" % self.outprefix)

    #==========================================================================

    def _prmtop_to_charmm(self):
        """
        Converts an AMBER prmtop with AMBER parameters to a psf file,
        using ParmEd.
        """
        # Save PSF topology and parameter file
        parmstruct = load_file(self.outprefix + ".prmtop",
                               xyz=self.outprefix + ".inpcrd",
                               structure=True)
        parmstruct.save(self.outprefix + ".psf", format="psf")

        # Save PDB file with coordinates
        m = molecule.load("parm7", self.outprefix + ".prmtop", "rst7",
                          self.outprefix + ".inpcrd")
        atomsel("all", m).write("pdb", self.outprefix + ".pdb")
        molecule.delete(m)