示例#1
0
def test_ff_assignment_doesnt_change_topology(pdb3aid):
    m = pdb3aid
    protein = mdt.Molecule(m.get_atoms('protein'))
    ligand = mdt.Molecule(m.get_atoms('unknown'))
    ligff = mdt.create_ff_parameters(ligand, charges='gasteiger')

    mdt.guess_histidine_states(protein)
    mol = protein.combine(ligand)

    ff = mdt.forcefields.DefaultAmber()
    ff.add_ff(ligff)

    mdready = ff.create_prepped_molecule(mol)

    assert mdready.num_residues == mol.num_residues
    assert mdready.num_chains == mol.num_chains
    for c1, c2 in zip(mdready.chains, mol.chains):
        assert c1.name == c2.name
        assert c1.num_residues == c2.num_residues
        assert c1.index == c2.index

    for newr, oldr in zip(mdready.residues, mol.residues):
        assert newr.index == oldr.index
        if newr.resname == 'HIS':
            assert oldr.resname in 'HIS HID HIE HIP'.split()
        else:
            assert newr.resname == oldr.resname
        assert newr.pdbindex == oldr.pdbindex
        assert newr.chain.index == oldr.chain.index
        for atom in oldr:
            assert atom.name in newr
示例#2
0
def test_chain_rename(pdb3aid):
    res1 = mdt.Molecule(pdb3aid.residues[3])
    res2 = mdt.Molecule(pdb3aid.residues[4])
    newmol = mdt.Molecule([res1, res2])
    assert newmol.num_chains == 2
    assert newmol.num_residues == 2
    assert newmol.residues[0].name == res1.residues[0].name
    assert newmol.residues[1].name == res2.residues[0].name
    assert newmol.chains[0].name == 'A'
    assert newmol.chains[1].name == 'B'
示例#3
0
def biopy_to_mol(struc):
    """Convert a biopython PDB structure to an MDT molecule.
    Because Biopython doesn't assign bonds, assign connectivity using templates.

    Args:
        struc (Bio.PDB.Structure.Structure): Biopython PDB structure to convert

    Returns:
        moldesign.Molecule: converted molecule
    """
    # TODO: assign bonds using 1) CONECT records, 2) residue templates, 3) distance
    newatoms = []

    for chain in struc.get_chains():
        tmp, pdbidx, pdbid = chain.get_full_id()
        newchain = mdt.Chain(pdbname=pdbid.strip())

        for residue in chain.get_residues():
            newresidue = mdt.Residue(pdbname=residue.resname.strip(),
                                     pdbindex=residue.id[1])

            newchain.add(newresidue)

            for atom in residue.get_atom():
                newatom = mdt.Atom(element=atom.element,
                                   name=atom.get_name(),
                                   pdbname=atom.get_name(),
                                   pdbindex=atom.get_serial_number())
                newatom.position = atom.coord * u.angstrom
                newresidue.add(newatom)

                newatoms.append(newatom)

    return mdt.Molecule(newatoms, name=struc.get_full_id()[0])
示例#4
0
def combine_molecules(mdtfile1, mdtfile2):
    import moldesign as mdt

    m1 = mdt.read(mdtfile1)
    m2 = mdt.read(mdtfile2)
    newmol = mdt.Molecule(m1.atoms + m2.atoms)
    newmol.write('out.pkl')
示例#5
0
    def _build_mm_system(self):
        # Set up the MM subsystem - includes all atoms,
        # but we remove all FF terms except LJ
        self.mmmol = mdt.Molecule(self.mol)
        for atom, subatom in zip(self.mol.atoms, self.mmmol.atoms):
            subatom.molecule_atom = atom
            atom.props.mm_child = subatom
        self.mmmol.set_energy_model(self.mm_model)
        # Remove charges and bonds for QM atoms
        ff = self.mm_model.params.ff
        forcefield = ff.get_parameters(self.mmmol)

        def prune(termlist):  # remove intra-qm region forces
            # TODO: what about LJ forces???
            newterms = []
            for term in termlist:
                subsystems = set([atom.molecule_atom.props.subsystem for atom in termlist.atom])
                subsystems = list(subsystems)
                if len(subsystems) == 1 and subsystems[0] == 'mm':
                    newterms.append(subsystems)
                elif len(subsystems) > 1:
                    raise ValueError('Bond crosses boundary: %s' % term)

        forcefield.bonds = prune(forcefield.bonds)
        forcefield.angles = prune(forcefield.angles)
        forcefield.dihedrals = prune(forcefield.dihedrals)
        forcefield.impropers = prune(forcefield.impropers)
示例#6
0
def test_initialization_charges():
    a1 = mdt.Atom('Na', formal_charge=-1)
    mol = mdt.Molecule([a1])
    assert mol.charge == -1 * u.q_e

    with pytest.raises(TypeError):
        mdt.Atom(
            'H', charge=3
        )  # it needs to be "formal_charge" to distinguish from partial charge

    m2 = mdt.Molecule([a1], charge=-1)
    assert m2.charge == -1 * u.q_e

    m2 = mdt.Molecule([a1], charge=-3 * u.q_e)
    # TODO: test for warning
    assert m2.charge == -3 * u.q_e
示例#7
0
def restore_topology(mol, topo):
    """ Restores chain IDs and residue indices (these are stripped by some methods)

    Args:
        mol (mdt.Molecule): molecule to restore topology to
        topo (mdt.Molecule): reference topology

    Returns:
        mdt.Molecule: a copy of ``mol`` with a restored topology
    """
    import moldesign as mdt

    assert mol.num_residues == topo.num_residues
    assert mol.num_chains == 1

    chain_map = {}
    for chain in topo.chains:
        chain_map[chain] = mdt.Chain(name=chain.name)

    for res, refres in zip(mol.residues, topo.residues):
        if refres.resname != res.resname:
            print((
                'INFO: Residue #{res.index} residue code changed from "{refres.resname}"'
                ' to "{res.resname}".').format(res=res, refres=refres))
        res.pdbindex = refres.pdbindex
        res.name = refres.name
        res.chain = chain_map[refres.chain]

    return mdt.Molecule(mol.atoms)
def split_chains(mol, distance_threshold=1.75*u.angstrom):
    """ Split a molecule's chains into unbroken biopolymers and groups of non-polymers

    This function is non-destructive - the passed molecule will not be modified.

    Specifically, this function will:
       - Split any chain with non-contiguous biopolymeric pieces into single, contiguous polymers
       - Remove any solvent molecules from a chain into their own chain
       - Isolate ligands from each chain into their own chains

    Args:
        mol (mdt.Molecule): Input molecule
        distance_threshold (u.Scalar[length]): if not ``None``, the maximum distance between
           adjacent residues for which we consider them "contiguous". For PDB data, values greater
           than 1.4 Angstrom are eminently reasonable; the default threshold of 1.75 Angstrom is
           purposefully set to be extremely cautious (and still much lower than the distance to
           the *next* nearest neighbor, generally around 2.5 Angstrom)

    Returns:
        mdt.Molecule: molecule with separated chains
    """

    tempmol = mol.copy()

    def bonded(r1, r2):
        if r2 not in r1.bonded_residues:
            return False
        if distance_threshold is not None and r1.distance(r2) > distance_threshold:
            return False
        return True

    def addto(chain, res):
        res.chain = None
        chain.add(res)

    allchains = [mdt.Chain(tempmol.chains[0].name)]
    for chain in tempmol.chains:
        chaintype = chain.residues[0].type
        solventchain = mdt.Chain(None)
        ligandchain = mdt.Chain(None)

        for ires, residue in enumerate(chain.residues):
            if residue.type == 'unknown':
                thischain = ligandchain
            elif residue.type in ('water', 'solvent', 'ion'):
                thischain = solventchain
            else:
                assert residue.type == chaintype
                if ires != 0 and not bonded(residue.prev_residue, residue):
                    allchains.append(mdt.Chain(None))
                thischain = allchains[-1]

            addto(thischain, residue)

        for c in (solventchain, ligandchain):
            if c.num_atoms > 0:
                allchains.append(c)

    return mdt.Molecule(allchains)
示例#9
0
def h2():
    atom1 = mdt.Atom('H')
    atom1.x = 0.5 * u.angstrom
    atom2 = mdt.Atom(atnum=1)
    atom2.position = [-0.5, 0.0, 0.0] * u.angstrom
    h2 = mdt.Molecule([atom1, atom2], name='h2')
    atom1.bond_to(atom2, 1)
    return h2
示例#10
0
def test_charge_from_number(h2):
    h2plus = mdt.Molecule(h2, charge=1)
    assert h2plus.charge == 1 * u.q_e

    h2plus.charge = 2 * u.q_e
    assert h2plus.charge == 2 * u.q_e

    h2plus.charge = 3
    assert h2plus.charge == 3 * u.q_e
def build_assembly(mol, assembly_name):
    """ Create biological assembly using a bioassembly specification.

    This routine builds a biomolecular assembly using the specification from a PDB header (if
    present, this data can be found in the  "REMARK 350" lines in the PDB file). Assemblies are
    author-assigned structures created by copying, translating, and rotating a subset of the
    chains in the PDB file.

    See Also:
        http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies

    Args:
        mol (moldesign.Molecule): Molecule with assembly data (assembly data will be created by the
            PDB parser at ``molecule.properties.bioassembly``)
        assembly_name (str OR int): id of the biomolecular assembly to build.

    Returns:
        mol (moldesign.Molecule): molecule containing the complete assembly

    Raises:
        AttributeError: If the molecule does not contain any biomolecular assembly data
        KeyError: If the specified assembly is not present
    """
    if isinstance(assembly_name, int): assembly_name = str(assembly_name)

    if 'bioassemblies' not in mol.properties:
        raise AttributeError(
            'This molecule does not contain any biomolecular assembly data')
    try:
        asm = mol.properties.bioassemblies[assembly_name]
    except KeyError:
        raise KeyError(
            ('The specified assembly name ("%s") was not found. The following '
             'assemblies are present: %s') %
            (assembly_name, ', '.join(mol.properties.bioassemblies.keys())))

    # Make sure each chain gets a unique name - up to all the letters in the alphabet, anyway
    used_chain_names = set()
    alpha = iter(string.ascii_uppercase)

    # Create the new molecule by copying, transforming, and renaming the original chains
    all_atoms = moldesign.molecules.atomcollections.AtomList()
    for i, t in enumerate(asm.transforms):
        for chain_name in asm.chains:
            chain = mol.chains[chain_name].copy()
            chain.transform(t)

            while chain.name in used_chain_names:
                chain.name = alpha.next()
            used_chain_names.add(chain.name)
            chain.pdbname = chain.pdbindex = chain.name
            all_atoms.extend(chain.atoms)
    newmol = mdt.Molecule(all_atoms,
                          name="%s (bioassembly %s)" %
                          (mol.name, assembly_name))
    return newmol
def test_solvate_protein_padding(pdb1yu8):
    newmol = mdt.add_water(pdb1yu8, padding=5.0 * u.angstrom)
    assert newmol.num_atoms > pdb1yu8.num_atoms

    oldmol = mdt.Molecule(newmol.residues[:pdb1yu8.num_residues])
    assert oldmol.same_topology(pdb1yu8, verbose=True)

    np.testing.assert_allclose(pdb1yu8.positions.value_in(u.angstrom),
                               oldmol.positions.value_in(u.angstrom),
                               atol=1e-3)
def linear():
    expected = {'C1': 1,
                'Cinf_v': 1}

    a1 = mdt.Atom(1)
    a1.position = [-1, 0, 0] * u.angstrom
    a2 = mdt.Atom(2)
    a2.position = [1, 0, 0] * u.angstrom

    return mdt.Molecule([a1, a2], name='linear_h2'), expected, True
 def get_input_file(self):
     if len(self.mol.atoms) <= 250:
         fmt = 'sdf'
     else:
         fmt = 'pdb'
     if not hasattr(self.mol, 'write'):
         writemol = mdt.Molecule(self.mol)
     else:
         writemol = self.mol
     instring = writemol.write(format=fmt)
     return instring, fmt
 def __init__(self, mol, unit_system=None, first_frame=False):
     self._init = True
     self.info = "Trajectory"
     self.frames = []
     self.mol = mol
     self.unit_system = utils.if_not_none(unit_system, mdt.units.default)
     self._property_keys = None
     self._tempmol = mdt.Molecule(self.mol.atoms, copy_atoms=True)
     self._tempmol.dynamic_dof = self.mol.dynamic_dof
     self._viz = None
     self.atoms = [_TrajAtom(self, i) for i in xrange(self.mol.num_atoms)]
     if first_frame: self.new_frame()
def planar():
    expected = {'C1': 1,
                'Cs': 1}

    a1 = mdt.Atom(1)
    a1.position = [-1, 0, 0] * u.angstrom
    a2 = mdt.Atom(2)
    a2.position = [0.9, 0.2, 0] * u.angstrom
    a3 = mdt.Atom(3)
    a3.position = [-0.9, 3.2, 0] * u.angstrom

    return mdt.Molecule([a1, a2, a3], name='planar_h3'), expected, True
def test_set_hybridization_and_saturate():
    # Creates just the carbons of ethylene, expects the routine to figure out the rest
    atom1 = mdt.Atom(6)
    atom2 = mdt.Atom(6)
    atom2.x = 1.35 * u.angstrom
    atom1.bond_to(atom2, 1)
    mol = mdt.Molecule([atom1, atom2])
    newmol = mdt.set_hybridization_and_saturate(mol)
    pytest.xfail('This is apparently broken')
    assert newmol.num_atoms == 6
    assert newmol.atoms[0].bond_graph[atom1] == 2
    assert len(newmol.get_atoms(atnum=1)) == 4
示例#18
0
def test_copy_breaks_link(h2):
    h2copy = mdt.Molecule(h2)
    h2.atoms[0].y = 4.0 * u.bohr
    assert h2copy.atoms[0].y == 0.0 * u.angstrom
    np.testing.assert_almost_equal(h2.positions[0, 1].value_in(u.bohr), 4.0, 7)
    assert h2copy.positions[0, 1] == 0.0 * u.bohr

    h2copy.momenta[1, 0] = 2.0 * u.default.momentum
    np.testing.assert_almost_equal(
        h2copy.atoms[1].px.value_in(u.default.momentum), 2.0, 7)
    assert h2.momenta[1, 0] == 0.0 * u.default.momentum
    assert h2.atoms[1].px == 0.0 * u.default.momentum
示例#19
0
    def combine(self, *others):
        """ Create a new molecule from a group of other AtomContainers

        Notes:
            - Chain IDs and sequence numbers are automatically assigned if they are missing
            - Chains will be renamed to prevent chain ID clashes
            - Residue resnames are not changed.

        Args:
            *others (AtomContainer or AtomList or List[moldesign.Atom]):

        Returns:
            mdt.Molecule: a new Molecule that's the union of this structure with all
              others. Chains will be renamed as necessary to avoid clashes.
        """
        new_atoms = []
        charge = 0
        names = []

        chain_names = collections.OrderedDict(
            (x, None) for x in string.ascii_uppercase)
        taken_names = set()
        seen_chains = set()

        for obj in itertools.chain([self], others):
            objatoms = mdt.helpers.get_all_atoms(obj).copy()
            for atom in objatoms:
                chain = atom.chain
                if chain not in seen_chains:
                    seen_chains.add(chain)
                    if chain.pdbindex is None or chain.pdbindex in taken_names:
                        chain.pdbindex = chain.name = next(
                            iter(chain_names.keys()))
                    chain_names.pop(chain.pdbindex, None)
                    taken_names.add(chain.pdbindex)

            new_atoms.extend(objatoms)
            charge += getattr(obj, 'charge', 0 * u.q_e)
            if hasattr(obj, 'name'):
                names.append(obj.name)
            elif objatoms[0].molecule is not None:
                names.append('%d atoms from %s' %
                             (len(objatoms), objatoms[0].molecule.name))
            else:
                names.append('list of %d unowned atoms' % len(objatoms))

        return mdt.Molecule(new_atoms,
                            copy_atoms=True,
                            charge=charge,
                            name='%s extended with %d atoms' %
                            (self.name, len(new_atoms) - self.num_atoms),
                            metadata=utils.DotDict(description='Union of %s' %
                                                   ', '.join(names)))
 def __init__(self, mol, unit_system=None, first_frame=False, name=None):
     self._init = True
     self.info = "Trajectory"
     self.frames = []
     self.mol = mol
     self.unit_system = utils.if_not_none(unit_system, mdt.units.default)
     self.properties = utils.DotDict()
     self._tempmol = mdt.Molecule(self.mol.atoms, copy_atoms=True)
     self._tempmol.dynamic_dof = self.mol.dynamic_dof
     self._viz = None
     self._atoms = None
     self.name = utils.if_not_none(name, 'untitled')
     if first_frame: self.new_frame()
示例#21
0
def test_copying_doesnt_corrupt_original_h2_harmonic(h2_harmonic):
    mol = h2_harmonic
    integ = mol.integrator
    model = mol.energy_model
    residue = mol.residues[0]
    chain = list(mol.chains)[0]
    m2 = mdt.Molecule(mol)
    assert integ is mol.integrator
    assert model is mol.energy_model
    assert len(mol.chains) == 1
    assert len(mol.residues) == 1
    assert residue == mol.residues[0]
    assert chain == list(mol.chains)[0]
示例#22
0
    def _build_qm_system(self):
        # Set up the QM system
        self.qmmol = mdt.Molecule(self.mol)
        for atom, subatom in zip(self.mol.atoms, self.qmmol.atoms):
            subatom.molecule_atom = atom
            atom.props.qm_child = subatom

        # Make the MM atoms point charges
        point_charges = {atom: atom.props.mm_child.ff.charge for atom in self.qmmol.atoms
                         if atom.molecule_atom.params.subsystem == 'mm'}
        self.qm_model.params.point_charges = point_charges
        self.qmmol.set_energy_model(self.qm_model)
        self._subatoms = self.qmmol.atoms + self.mmol.atoms
    def _setup_qm_subsystem(self):
        qmmol = mdt.Molecule(self.mol)
        self.mol.ff.copy_to(qmmol)

        self.qm_link_atoms = mdt.helpers.qmmm.create_link_atoms(
            self.mol, self.qm_atoms)
        if self.qm_link_atoms:
            raise ValueError('The %s model does not support link atoms' %
                             self.__class__.__name__)

        qmmol.set_energy_model(self.params.qm_model)
        qmmol.energy_model.params.qm_atom_indices = self.params.qm_atom_indices
        return qmmol
示例#24
0
def test_numeric_residue_name_1PYN(request, mol):
    """ The ligand in this residue is named "941", which causes a little trickiness
    """
    import parmed

    mol = request.getfixturevalue(mol)
    ligand = mdt.Molecule(mol.residues[283])

    params = mdt.create_ff_parameters(ligand, charges='gasteiger')
    params._file_list['mol.lib'].put('/tmp/tmp.lib')

    contents = parmed.load_file('/tmp/tmp.lib')
    assert len(contents) == 1
    assert list(contents.keys())[0] == '941'
示例#25
0
def test_numeric_residue_name_1PYN():
    """ The ligand in this residue is named "941", which causes a little trickiness
    """
    import parmed

    mol = mdt.read(get_data_path('1pyn.pdb'))
    ligand = mdt.Molecule(mol.residues[283])

    params = mdt.parameterize(ligand, charges='gasteiger')
    params.lib.put('/tmp/tmp.lib')

    contents = parmed.load_file('/tmp/tmp.lib')
    assert len(contents) == 1
    assert contents.keys()[0] == '941'
示例#26
0
    def finish_job(job):
        mol = mdt.fileio.read_pdb(job.get_output('helix.pdb').open(), assign_ccd_bonds=False)
        if mol.num_chains == 1:
            assert mol.num_residues % 2 == 0
            oldchain = mol.chains[0]
            oldchain.name = oldchain.pdbindex = oldchain.pdbname = 'A'
            newchain = mdt.Chain('B')
            for residue in mol.residues[mol.num_residues//2:]:
                residue.chain = newchain
            mol = mdt.Molecule(mol)
        mdt.helpers.assign_biopolymer_bonds(mol)

        mol.name = '%s-DNA Helix: %s' % (helix_type.upper(), sequence)
        return mol
 def _setup_qm_subsystem(self):
     """ QM subsystem for mechanical embedding is the QM atoms + any link atoms
     """
     qm_atoms = [
         self.mol.atoms[iatom] for iatom in self.params.qm_atom_indices
     ]
     self.qm_link_atoms = mdt.helpers.qmmm.create_link_atoms(
         self.mol, qm_atoms)
     qmmol = mdt.Molecule(qm_atoms + self.qm_link_atoms,
                          name='%s QM subsystem' % self.mol.name)
     for real_atom, qm_atom in zip(self.qm_atoms, qmmol.atoms):
         qm_atom.metadata.real_atom = real_atom
     qmmol.set_energy_model(self.params.qm_model)
     return qmmol
def test_mechanical_embedding_wfn(h2_h2_mechanical_embedding_rhf):
    mol = h2_h2_mechanical_embedding_rhf

    mol.calculate()
    qmprops = mol.properties.qmprops
    mmprops = mol.properties.mmprops

    h2_qm = mdt.Molecule(mol.residues[0])
    h2_qm.set_energy_model(mdt.models.RHF, basis='sto-3g')
    h2_qm.calculate()

    assert abs(h2_qm.potential_energy -
               qmprops.potential_energy) < 1e-8 * u.hartree
    helpers.assert_almost_equal(h2_qm.wfn.fock_ao, qmprops.wfn.fock_ao)
    assert qmprops.potential_energy + mmprops.potential_energy == mol.potential_energy
示例#29
0
def _prep_for_tleap(mol):
    """ Returns a modified *copy* that's been modified for input to tleap

    Makes the following modifications:
       1. Reassigns all residue IDs
       2. Assigns tleap-appropriate cysteine resnames
    """
    change = False
    clean = mdt.Molecule(mol.atoms)
    for residue in clean.residues:
        residue.pdbindex = residue.index + 1

        if residue.resname == 'CYS':  # deal with cysteine states
            if 'SG' not in residue.atoms or 'HG' in residue.atoms:
                continue  # sulfur's missing, we'll let tleap create it
            else:
                sulfur = residue.atoms['SG']

            if sulfur.formal_charge == -1 * u.q_e:
                residue.resname = 'CYM'
                change = True
                continue

            # check for a reasonable hybridization state
            if sulfur.formal_charge != 0 or sulfur.num_bonds not in (1, 2):
                raise ValueError("Unknown sulfur hybridization state for %s" %
                                 sulfur)

            # check for a disulfide bond
            for otheratom in sulfur.bonded_atoms:
                if otheratom.residue is not residue:
                    if otheratom.name != 'SG' or otheratom.residue.resname not in (
                            'CYS', 'CYX'):
                        raise ValueError(
                            'Unknown bond from cysteine sulfur (%s)' % sulfur)

                    # if we're here, this is a cystine with a disulfide bond
                    print(
                        'INFO: disulfide bond detected. Renaming %s from CYS to CYX'
                        % residue)
                    sulfur.residue.resname = 'CYX'

            clean._rebuild_from_atoms()

    return clean
def parmed_to_mdt(pmdmol):
    """ Convert parmed Structure to MDT Structure

    Args:
        pmdmol (parmed.Structure): parmed structure to convert

    Returns:
        mdt.Molecule: converted molecule
    """
    atoms = collections.OrderedDict()
    residues = {}
    chains = {}

    masses = [pa.mass for pa in pmdmol.atoms] * u.dalton
    positions = [[pa.xx, pa.xy, pa.xz] for pa in pmdmol.atoms] * u.angstrom

    for iatom, patm in enumerate(pmdmol.atoms):
        if patm.residue.chain not in chains:
            chains[patm.residue.chain] = mdt.Chain(pdbname=patm.residue.chain)
        chain = chains[patm.residue.chain]

        if patm.residue not in residues:
            residues[patm.residue] = mdt.Residue(resname=patm.residue.name,
                                                 pdbindex=patm.residue.number)
            residues[patm.residue].chain = chain
            chain.add(residues[patm.residue])
        residue = residues[patm.residue]

        atom = mdt.Atom(name=patm.name,
                        atnum=patm.atomic_number,
                        pdbindex=patm.number,
                        mass=masses[iatom])
        atom.position = positions[iatom]

        atom.residue = residue
        residue.add(atom)
        assert patm not in atoms
        atoms[patm] = atom

    for pbnd in pmdmol.bonds:
        atoms[pbnd.atom1].bond_to(atoms[pbnd.atom2], int(pbnd.order))

    mol = mdt.Molecule(list(atoms.values()),
                       metadata=_get_pdb_metadata(pmdmol))
    return mol