def test_ff_assignment_doesnt_change_topology(pdb3aid): m = pdb3aid protein = mdt.Molecule(m.get_atoms('protein')) ligand = mdt.Molecule(m.get_atoms('unknown')) ligff = mdt.create_ff_parameters(ligand, charges='gasteiger') mdt.guess_histidine_states(protein) mol = protein.combine(ligand) ff = mdt.forcefields.DefaultAmber() ff.add_ff(ligff) mdready = ff.create_prepped_molecule(mol) assert mdready.num_residues == mol.num_residues assert mdready.num_chains == mol.num_chains for c1, c2 in zip(mdready.chains, mol.chains): assert c1.name == c2.name assert c1.num_residues == c2.num_residues assert c1.index == c2.index for newr, oldr in zip(mdready.residues, mol.residues): assert newr.index == oldr.index if newr.resname == 'HIS': assert oldr.resname in 'HIS HID HIE HIP'.split() else: assert newr.resname == oldr.resname assert newr.pdbindex == oldr.pdbindex assert newr.chain.index == oldr.chain.index for atom in oldr: assert atom.name in newr
def test_chain_rename(pdb3aid): res1 = mdt.Molecule(pdb3aid.residues[3]) res2 = mdt.Molecule(pdb3aid.residues[4]) newmol = mdt.Molecule([res1, res2]) assert newmol.num_chains == 2 assert newmol.num_residues == 2 assert newmol.residues[0].name == res1.residues[0].name assert newmol.residues[1].name == res2.residues[0].name assert newmol.chains[0].name == 'A' assert newmol.chains[1].name == 'B'
def biopy_to_mol(struc): """Convert a biopython PDB structure to an MDT molecule. Because Biopython doesn't assign bonds, assign connectivity using templates. Args: struc (Bio.PDB.Structure.Structure): Biopython PDB structure to convert Returns: moldesign.Molecule: converted molecule """ # TODO: assign bonds using 1) CONECT records, 2) residue templates, 3) distance newatoms = [] for chain in struc.get_chains(): tmp, pdbidx, pdbid = chain.get_full_id() newchain = mdt.Chain(pdbname=pdbid.strip()) for residue in chain.get_residues(): newresidue = mdt.Residue(pdbname=residue.resname.strip(), pdbindex=residue.id[1]) newchain.add(newresidue) for atom in residue.get_atom(): newatom = mdt.Atom(element=atom.element, name=atom.get_name(), pdbname=atom.get_name(), pdbindex=atom.get_serial_number()) newatom.position = atom.coord * u.angstrom newresidue.add(newatom) newatoms.append(newatom) return mdt.Molecule(newatoms, name=struc.get_full_id()[0])
def combine_molecules(mdtfile1, mdtfile2): import moldesign as mdt m1 = mdt.read(mdtfile1) m2 = mdt.read(mdtfile2) newmol = mdt.Molecule(m1.atoms + m2.atoms) newmol.write('out.pkl')
def _build_mm_system(self): # Set up the MM subsystem - includes all atoms, # but we remove all FF terms except LJ self.mmmol = mdt.Molecule(self.mol) for atom, subatom in zip(self.mol.atoms, self.mmmol.atoms): subatom.molecule_atom = atom atom.props.mm_child = subatom self.mmmol.set_energy_model(self.mm_model) # Remove charges and bonds for QM atoms ff = self.mm_model.params.ff forcefield = ff.get_parameters(self.mmmol) def prune(termlist): # remove intra-qm region forces # TODO: what about LJ forces??? newterms = [] for term in termlist: subsystems = set([atom.molecule_atom.props.subsystem for atom in termlist.atom]) subsystems = list(subsystems) if len(subsystems) == 1 and subsystems[0] == 'mm': newterms.append(subsystems) elif len(subsystems) > 1: raise ValueError('Bond crosses boundary: %s' % term) forcefield.bonds = prune(forcefield.bonds) forcefield.angles = prune(forcefield.angles) forcefield.dihedrals = prune(forcefield.dihedrals) forcefield.impropers = prune(forcefield.impropers)
def test_initialization_charges(): a1 = mdt.Atom('Na', formal_charge=-1) mol = mdt.Molecule([a1]) assert mol.charge == -1 * u.q_e with pytest.raises(TypeError): mdt.Atom( 'H', charge=3 ) # it needs to be "formal_charge" to distinguish from partial charge m2 = mdt.Molecule([a1], charge=-1) assert m2.charge == -1 * u.q_e m2 = mdt.Molecule([a1], charge=-3 * u.q_e) # TODO: test for warning assert m2.charge == -3 * u.q_e
def restore_topology(mol, topo): """ Restores chain IDs and residue indices (these are stripped by some methods) Args: mol (mdt.Molecule): molecule to restore topology to topo (mdt.Molecule): reference topology Returns: mdt.Molecule: a copy of ``mol`` with a restored topology """ import moldesign as mdt assert mol.num_residues == topo.num_residues assert mol.num_chains == 1 chain_map = {} for chain in topo.chains: chain_map[chain] = mdt.Chain(name=chain.name) for res, refres in zip(mol.residues, topo.residues): if refres.resname != res.resname: print(( 'INFO: Residue #{res.index} residue code changed from "{refres.resname}"' ' to "{res.resname}".').format(res=res, refres=refres)) res.pdbindex = refres.pdbindex res.name = refres.name res.chain = chain_map[refres.chain] return mdt.Molecule(mol.atoms)
def split_chains(mol, distance_threshold=1.75*u.angstrom): """ Split a molecule's chains into unbroken biopolymers and groups of non-polymers This function is non-destructive - the passed molecule will not be modified. Specifically, this function will: - Split any chain with non-contiguous biopolymeric pieces into single, contiguous polymers - Remove any solvent molecules from a chain into their own chain - Isolate ligands from each chain into their own chains Args: mol (mdt.Molecule): Input molecule distance_threshold (u.Scalar[length]): if not ``None``, the maximum distance between adjacent residues for which we consider them "contiguous". For PDB data, values greater than 1.4 Angstrom are eminently reasonable; the default threshold of 1.75 Angstrom is purposefully set to be extremely cautious (and still much lower than the distance to the *next* nearest neighbor, generally around 2.5 Angstrom) Returns: mdt.Molecule: molecule with separated chains """ tempmol = mol.copy() def bonded(r1, r2): if r2 not in r1.bonded_residues: return False if distance_threshold is not None and r1.distance(r2) > distance_threshold: return False return True def addto(chain, res): res.chain = None chain.add(res) allchains = [mdt.Chain(tempmol.chains[0].name)] for chain in tempmol.chains: chaintype = chain.residues[0].type solventchain = mdt.Chain(None) ligandchain = mdt.Chain(None) for ires, residue in enumerate(chain.residues): if residue.type == 'unknown': thischain = ligandchain elif residue.type in ('water', 'solvent', 'ion'): thischain = solventchain else: assert residue.type == chaintype if ires != 0 and not bonded(residue.prev_residue, residue): allchains.append(mdt.Chain(None)) thischain = allchains[-1] addto(thischain, residue) for c in (solventchain, ligandchain): if c.num_atoms > 0: allchains.append(c) return mdt.Molecule(allchains)
def h2(): atom1 = mdt.Atom('H') atom1.x = 0.5 * u.angstrom atom2 = mdt.Atom(atnum=1) atom2.position = [-0.5, 0.0, 0.0] * u.angstrom h2 = mdt.Molecule([atom1, atom2], name='h2') atom1.bond_to(atom2, 1) return h2
def test_charge_from_number(h2): h2plus = mdt.Molecule(h2, charge=1) assert h2plus.charge == 1 * u.q_e h2plus.charge = 2 * u.q_e assert h2plus.charge == 2 * u.q_e h2plus.charge = 3 assert h2plus.charge == 3 * u.q_e
def build_assembly(mol, assembly_name): """ Create biological assembly using a bioassembly specification. This routine builds a biomolecular assembly using the specification from a PDB header (if present, this data can be found in the "REMARK 350" lines in the PDB file). Assemblies are author-assigned structures created by copying, translating, and rotating a subset of the chains in the PDB file. See Also: http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies Args: mol (moldesign.Molecule): Molecule with assembly data (assembly data will be created by the PDB parser at ``molecule.properties.bioassembly``) assembly_name (str OR int): id of the biomolecular assembly to build. Returns: mol (moldesign.Molecule): molecule containing the complete assembly Raises: AttributeError: If the molecule does not contain any biomolecular assembly data KeyError: If the specified assembly is not present """ if isinstance(assembly_name, int): assembly_name = str(assembly_name) if 'bioassemblies' not in mol.properties: raise AttributeError( 'This molecule does not contain any biomolecular assembly data') try: asm = mol.properties.bioassemblies[assembly_name] except KeyError: raise KeyError( ('The specified assembly name ("%s") was not found. The following ' 'assemblies are present: %s') % (assembly_name, ', '.join(mol.properties.bioassemblies.keys()))) # Make sure each chain gets a unique name - up to all the letters in the alphabet, anyway used_chain_names = set() alpha = iter(string.ascii_uppercase) # Create the new molecule by copying, transforming, and renaming the original chains all_atoms = moldesign.molecules.atomcollections.AtomList() for i, t in enumerate(asm.transforms): for chain_name in asm.chains: chain = mol.chains[chain_name].copy() chain.transform(t) while chain.name in used_chain_names: chain.name = alpha.next() used_chain_names.add(chain.name) chain.pdbname = chain.pdbindex = chain.name all_atoms.extend(chain.atoms) newmol = mdt.Molecule(all_atoms, name="%s (bioassembly %s)" % (mol.name, assembly_name)) return newmol
def test_solvate_protein_padding(pdb1yu8): newmol = mdt.add_water(pdb1yu8, padding=5.0 * u.angstrom) assert newmol.num_atoms > pdb1yu8.num_atoms oldmol = mdt.Molecule(newmol.residues[:pdb1yu8.num_residues]) assert oldmol.same_topology(pdb1yu8, verbose=True) np.testing.assert_allclose(pdb1yu8.positions.value_in(u.angstrom), oldmol.positions.value_in(u.angstrom), atol=1e-3)
def linear(): expected = {'C1': 1, 'Cinf_v': 1} a1 = mdt.Atom(1) a1.position = [-1, 0, 0] * u.angstrom a2 = mdt.Atom(2) a2.position = [1, 0, 0] * u.angstrom return mdt.Molecule([a1, a2], name='linear_h2'), expected, True
def get_input_file(self): if len(self.mol.atoms) <= 250: fmt = 'sdf' else: fmt = 'pdb' if not hasattr(self.mol, 'write'): writemol = mdt.Molecule(self.mol) else: writemol = self.mol instring = writemol.write(format=fmt) return instring, fmt
def __init__(self, mol, unit_system=None, first_frame=False): self._init = True self.info = "Trajectory" self.frames = [] self.mol = mol self.unit_system = utils.if_not_none(unit_system, mdt.units.default) self._property_keys = None self._tempmol = mdt.Molecule(self.mol.atoms, copy_atoms=True) self._tempmol.dynamic_dof = self.mol.dynamic_dof self._viz = None self.atoms = [_TrajAtom(self, i) for i in xrange(self.mol.num_atoms)] if first_frame: self.new_frame()
def planar(): expected = {'C1': 1, 'Cs': 1} a1 = mdt.Atom(1) a1.position = [-1, 0, 0] * u.angstrom a2 = mdt.Atom(2) a2.position = [0.9, 0.2, 0] * u.angstrom a3 = mdt.Atom(3) a3.position = [-0.9, 3.2, 0] * u.angstrom return mdt.Molecule([a1, a2, a3], name='planar_h3'), expected, True
def test_set_hybridization_and_saturate(): # Creates just the carbons of ethylene, expects the routine to figure out the rest atom1 = mdt.Atom(6) atom2 = mdt.Atom(6) atom2.x = 1.35 * u.angstrom atom1.bond_to(atom2, 1) mol = mdt.Molecule([atom1, atom2]) newmol = mdt.set_hybridization_and_saturate(mol) pytest.xfail('This is apparently broken') assert newmol.num_atoms == 6 assert newmol.atoms[0].bond_graph[atom1] == 2 assert len(newmol.get_atoms(atnum=1)) == 4
def test_copy_breaks_link(h2): h2copy = mdt.Molecule(h2) h2.atoms[0].y = 4.0 * u.bohr assert h2copy.atoms[0].y == 0.0 * u.angstrom np.testing.assert_almost_equal(h2.positions[0, 1].value_in(u.bohr), 4.0, 7) assert h2copy.positions[0, 1] == 0.0 * u.bohr h2copy.momenta[1, 0] = 2.0 * u.default.momentum np.testing.assert_almost_equal( h2copy.atoms[1].px.value_in(u.default.momentum), 2.0, 7) assert h2.momenta[1, 0] == 0.0 * u.default.momentum assert h2.atoms[1].px == 0.0 * u.default.momentum
def combine(self, *others): """ Create a new molecule from a group of other AtomContainers Notes: - Chain IDs and sequence numbers are automatically assigned if they are missing - Chains will be renamed to prevent chain ID clashes - Residue resnames are not changed. Args: *others (AtomContainer or AtomList or List[moldesign.Atom]): Returns: mdt.Molecule: a new Molecule that's the union of this structure with all others. Chains will be renamed as necessary to avoid clashes. """ new_atoms = [] charge = 0 names = [] chain_names = collections.OrderedDict( (x, None) for x in string.ascii_uppercase) taken_names = set() seen_chains = set() for obj in itertools.chain([self], others): objatoms = mdt.helpers.get_all_atoms(obj).copy() for atom in objatoms: chain = atom.chain if chain not in seen_chains: seen_chains.add(chain) if chain.pdbindex is None or chain.pdbindex in taken_names: chain.pdbindex = chain.name = next( iter(chain_names.keys())) chain_names.pop(chain.pdbindex, None) taken_names.add(chain.pdbindex) new_atoms.extend(objatoms) charge += getattr(obj, 'charge', 0 * u.q_e) if hasattr(obj, 'name'): names.append(obj.name) elif objatoms[0].molecule is not None: names.append('%d atoms from %s' % (len(objatoms), objatoms[0].molecule.name)) else: names.append('list of %d unowned atoms' % len(objatoms)) return mdt.Molecule(new_atoms, copy_atoms=True, charge=charge, name='%s extended with %d atoms' % (self.name, len(new_atoms) - self.num_atoms), metadata=utils.DotDict(description='Union of %s' % ', '.join(names)))
def __init__(self, mol, unit_system=None, first_frame=False, name=None): self._init = True self.info = "Trajectory" self.frames = [] self.mol = mol self.unit_system = utils.if_not_none(unit_system, mdt.units.default) self.properties = utils.DotDict() self._tempmol = mdt.Molecule(self.mol.atoms, copy_atoms=True) self._tempmol.dynamic_dof = self.mol.dynamic_dof self._viz = None self._atoms = None self.name = utils.if_not_none(name, 'untitled') if first_frame: self.new_frame()
def test_copying_doesnt_corrupt_original_h2_harmonic(h2_harmonic): mol = h2_harmonic integ = mol.integrator model = mol.energy_model residue = mol.residues[0] chain = list(mol.chains)[0] m2 = mdt.Molecule(mol) assert integ is mol.integrator assert model is mol.energy_model assert len(mol.chains) == 1 assert len(mol.residues) == 1 assert residue == mol.residues[0] assert chain == list(mol.chains)[0]
def _build_qm_system(self): # Set up the QM system self.qmmol = mdt.Molecule(self.mol) for atom, subatom in zip(self.mol.atoms, self.qmmol.atoms): subatom.molecule_atom = atom atom.props.qm_child = subatom # Make the MM atoms point charges point_charges = {atom: atom.props.mm_child.ff.charge for atom in self.qmmol.atoms if atom.molecule_atom.params.subsystem == 'mm'} self.qm_model.params.point_charges = point_charges self.qmmol.set_energy_model(self.qm_model) self._subatoms = self.qmmol.atoms + self.mmol.atoms
def _setup_qm_subsystem(self): qmmol = mdt.Molecule(self.mol) self.mol.ff.copy_to(qmmol) self.qm_link_atoms = mdt.helpers.qmmm.create_link_atoms( self.mol, self.qm_atoms) if self.qm_link_atoms: raise ValueError('The %s model does not support link atoms' % self.__class__.__name__) qmmol.set_energy_model(self.params.qm_model) qmmol.energy_model.params.qm_atom_indices = self.params.qm_atom_indices return qmmol
def test_numeric_residue_name_1PYN(request, mol): """ The ligand in this residue is named "941", which causes a little trickiness """ import parmed mol = request.getfixturevalue(mol) ligand = mdt.Molecule(mol.residues[283]) params = mdt.create_ff_parameters(ligand, charges='gasteiger') params._file_list['mol.lib'].put('/tmp/tmp.lib') contents = parmed.load_file('/tmp/tmp.lib') assert len(contents) == 1 assert list(contents.keys())[0] == '941'
def test_numeric_residue_name_1PYN(): """ The ligand in this residue is named "941", which causes a little trickiness """ import parmed mol = mdt.read(get_data_path('1pyn.pdb')) ligand = mdt.Molecule(mol.residues[283]) params = mdt.parameterize(ligand, charges='gasteiger') params.lib.put('/tmp/tmp.lib') contents = parmed.load_file('/tmp/tmp.lib') assert len(contents) == 1 assert contents.keys()[0] == '941'
def finish_job(job): mol = mdt.fileio.read_pdb(job.get_output('helix.pdb').open(), assign_ccd_bonds=False) if mol.num_chains == 1: assert mol.num_residues % 2 == 0 oldchain = mol.chains[0] oldchain.name = oldchain.pdbindex = oldchain.pdbname = 'A' newchain = mdt.Chain('B') for residue in mol.residues[mol.num_residues//2:]: residue.chain = newchain mol = mdt.Molecule(mol) mdt.helpers.assign_biopolymer_bonds(mol) mol.name = '%s-DNA Helix: %s' % (helix_type.upper(), sequence) return mol
def _setup_qm_subsystem(self): """ QM subsystem for mechanical embedding is the QM atoms + any link atoms """ qm_atoms = [ self.mol.atoms[iatom] for iatom in self.params.qm_atom_indices ] self.qm_link_atoms = mdt.helpers.qmmm.create_link_atoms( self.mol, qm_atoms) qmmol = mdt.Molecule(qm_atoms + self.qm_link_atoms, name='%s QM subsystem' % self.mol.name) for real_atom, qm_atom in zip(self.qm_atoms, qmmol.atoms): qm_atom.metadata.real_atom = real_atom qmmol.set_energy_model(self.params.qm_model) return qmmol
def test_mechanical_embedding_wfn(h2_h2_mechanical_embedding_rhf): mol = h2_h2_mechanical_embedding_rhf mol.calculate() qmprops = mol.properties.qmprops mmprops = mol.properties.mmprops h2_qm = mdt.Molecule(mol.residues[0]) h2_qm.set_energy_model(mdt.models.RHF, basis='sto-3g') h2_qm.calculate() assert abs(h2_qm.potential_energy - qmprops.potential_energy) < 1e-8 * u.hartree helpers.assert_almost_equal(h2_qm.wfn.fock_ao, qmprops.wfn.fock_ao) assert qmprops.potential_energy + mmprops.potential_energy == mol.potential_energy
def _prep_for_tleap(mol): """ Returns a modified *copy* that's been modified for input to tleap Makes the following modifications: 1. Reassigns all residue IDs 2. Assigns tleap-appropriate cysteine resnames """ change = False clean = mdt.Molecule(mol.atoms) for residue in clean.residues: residue.pdbindex = residue.index + 1 if residue.resname == 'CYS': # deal with cysteine states if 'SG' not in residue.atoms or 'HG' in residue.atoms: continue # sulfur's missing, we'll let tleap create it else: sulfur = residue.atoms['SG'] if sulfur.formal_charge == -1 * u.q_e: residue.resname = 'CYM' change = True continue # check for a reasonable hybridization state if sulfur.formal_charge != 0 or sulfur.num_bonds not in (1, 2): raise ValueError("Unknown sulfur hybridization state for %s" % sulfur) # check for a disulfide bond for otheratom in sulfur.bonded_atoms: if otheratom.residue is not residue: if otheratom.name != 'SG' or otheratom.residue.resname not in ( 'CYS', 'CYX'): raise ValueError( 'Unknown bond from cysteine sulfur (%s)' % sulfur) # if we're here, this is a cystine with a disulfide bond print( 'INFO: disulfide bond detected. Renaming %s from CYS to CYX' % residue) sulfur.residue.resname = 'CYX' clean._rebuild_from_atoms() return clean
def parmed_to_mdt(pmdmol): """ Convert parmed Structure to MDT Structure Args: pmdmol (parmed.Structure): parmed structure to convert Returns: mdt.Molecule: converted molecule """ atoms = collections.OrderedDict() residues = {} chains = {} masses = [pa.mass for pa in pmdmol.atoms] * u.dalton positions = [[pa.xx, pa.xy, pa.xz] for pa in pmdmol.atoms] * u.angstrom for iatom, patm in enumerate(pmdmol.atoms): if patm.residue.chain not in chains: chains[patm.residue.chain] = mdt.Chain(pdbname=patm.residue.chain) chain = chains[patm.residue.chain] if patm.residue not in residues: residues[patm.residue] = mdt.Residue(resname=patm.residue.name, pdbindex=patm.residue.number) residues[patm.residue].chain = chain chain.add(residues[patm.residue]) residue = residues[patm.residue] atom = mdt.Atom(name=patm.name, atnum=patm.atomic_number, pdbindex=patm.number, mass=masses[iatom]) atom.position = positions[iatom] atom.residue = residue residue.add(atom) assert patm not in atoms atoms[patm] = atom for pbnd in pmdmol.bonds: atoms[pbnd.atom1].bond_to(atoms[pbnd.atom2], int(pbnd.order)) mol = mdt.Molecule(list(atoms.values()), metadata=_get_pdb_metadata(pmdmol)) return mol