Пример #1
0
def _guess_atomic_number(name, residue=None):
    """ Guesses the atomic number """
    # Special-case single-atom residues, which are almost always ions
    name = ''.join(c for c in name if c.isalpha())
    if residue is None or len(residue.atoms) == 1:
        if len(name) > 1:
            try:
                return AtomicNum[name[0].upper() + name[1].lower()]
            except KeyError:
                return AtomicNum[element_by_name(name)]
    return AtomicNum[element_by_name(name)]
Пример #2
0
def _guess_atomic_number(name, residue=None):
    """ Guesses the atomic number """
    # Special-case single-atom residues, which are almost always ions
    name = ''.join(c for c in name if c.isalpha())
    if residue is None or len(residue.atoms) == 1:
        if len(name) > 1:
            try:
                return AtomicNum[name[0].upper() + name[1].lower()]
            except KeyError:
                return AtomicNum[element_by_name(name)]
    return AtomicNum[element_by_name(name)]
Пример #3
0
 def __init__(self, fname, seq=None):
     super(XyzFile, self).__init__()
     if isinstance(fname, string_types):
         fxyz = genopen(fname, 'r')
         own_handle_xyz = True
     else:
         fxyz = fname
         own_handle_xyz = False
     if seq is not None:
         seqstruct = load_file(seq)
     # Now parse the file
     try:
         natom = int(fxyz.readline().split()[0])
     except (ValueError, IndexError):
         raise TinkerError('Bad XYZ file format; first line')
     if seq is not None and natom != len(seqstruct.atoms):
         raise ValueError(
             'Sequence file %s # of atoms does not match the # '
             'of atoms in the XYZ file' % seq)
     words = fxyz.readline().split()
     if len(words) == 6 and not XyzFile._check_atom_record(words):
         self.box = [float(w) for w in words]
         words = fxyz.readline().split()
     atom = Atom(atomic_number=AtomicNum[element_by_name(words[1])],
                 name=words[1],
                 type=words[5])
     atom.xx, atom.xy, atom.xz = [float(w) for w in words[2:5]]
     residue = Residue('SYS')
     residue.number = 1
     residue._idx = 0
     if seq is not None:
         residue = seqstruct.residues[0]
     self.add_atom(atom, residue.name, residue.number, residue.chain,
                   residue.insertion_code, residue.segid)
     bond_ids = [[int(w) for w in words[6:]]]
     for i, line in enumerate(fxyz):
         words = line.split()
         atom = Atom(atomic_number=AtomicNum[element_by_name(words[1])],
                     name=words[1],
                     type=words[5])
         atom.xx, atom.xy, atom.xz = [float(w) for w in words[2:5]]
         if seq is not None:
             residue = seqstruct.atoms[i + 1].residue
         self.add_atom(atom, residue.name, residue.number, residue.chain,
                       residue.insertion_code, residue.segid)
         bond_ids.append([int(w) for w in words[6:]])
     # All of the bonds are stored now -- go ahead and make them now
     for atom, bonds in zip(self.atoms, bond_ids):
         i = atom.idx + 1
         for idx in bonds:
             if idx > i:
                 self.bonds.append(Bond(atom, self.atoms[idx - 1]))
     if own_handle_xyz:
         fxyz.close()
Пример #4
0
 def __init__(self, fname, seq=None):
     super(XyzFile, self).__init__()
     if isinstance(fname, string_types):
         fxyz = genopen(fname, 'r')
         own_handle_xyz = True
     else:
         fxyz = fname
         own_handle_xyz = False
     if seq is not None:
         seqstruct = load_file(seq)
     # Now parse the file
     try:
         natom = int(fxyz.readline().split()[0])
     except (ValueError, IndexError):
         raise TinkerError('Bad XYZ file format; first line')
     if seq is not None and natom != len(seqstruct.atoms):
         raise ValueError('Sequence file %s # of atoms does not match the # '
                          'of atoms in the XYZ file' % seq)
     words = fxyz.readline().split()
     if len(words) == 6 and not XyzFile._check_atom_record(words):
         self.box = [float(w) for w in words]
         words = fxyz.readline().split()
     atom = Atom(atomic_number=AtomicNum[element_by_name(words[1])],
                 name=words[1], type=words[5])
     atom.xx, atom.xy, atom.xz = [float(w) for w in words[2:5]]
     residue = Residue('SYS')
     residue.number = 1
     residue._idx = 0
     if seq is not None:
         residue = seqstruct.residues[0]
     self.add_atom(atom, residue.name, residue.number, residue.chain,
                   residue.insertion_code, residue.segid)
     bond_ids = [[int(w) for w in words[6:]]]
     for i, line in enumerate(fxyz):
         words = line.split()
         atom = Atom(atomic_number=AtomicNum[element_by_name(words[1])],
                     name=words[1], type=words[5])
         atom.xx, atom.xy, atom.xz = [float(w) for w in words[2:5]]
         if seq is not None:
             residue = seqstruct.atoms[i+1].residue
         self.add_atom(atom, residue.name, residue.number, residue.chain,
                       residue.insertion_code, residue.segid)
         bond_ids.append([int(w) for w in words[6:]])
     # All of the bonds are stored now -- go ahead and make them now
     for atom, bonds in zip(self.atoms, bond_ids):
         i = atom.idx + 1
         for idx in bonds:
             if idx > i:
                 self.bonds.append(Bond(atom, self.atoms[idx-1]))
     if own_handle_xyz:
         fxyz.close()
Пример #5
0
    def to_parmed(self, title='', **kwargs):
        """Create a ParmEd Structure from a Compound. """
        structure = pmd.Structure()
        structure.title = title if title else self.name
        atom_mapping = {}  # For creating bonds below
        guessed_elements = set()
        for atom in self.particles():
            atomic_number = None
            name = ''.join(char for char in atom.name if not char.isdigit())
            try:
                atomic_number = AtomicNum[atom.name]
            except KeyError:
                element = element_by_name(atom.name)
                if name not in guessed_elements:
                    warn('Guessing that "{}" is element: "{}"'.format(
                        atom, element))
                    guessed_elements.add(name)
            else:
                element = atom.name

            atomic_number = atomic_number or AtomicNum[element]
            mass = Mass[element]
            pmd_atom = pmd.Atom(atomic_number=atomic_number,
                                name=atom.name,
                                mass=mass)
            pmd_atom.xx, pmd_atom.xy, pmd_atom.xz = atom.pos * 10  # Angstroms
            structure.add_atom(pmd_atom, resname='RES', resnum=1)
            atom_mapping[atom] = pmd_atom

        for atom1, atom2 in self.bonds():
            bond = pmd.Bond(atom_mapping[atom1], atom_mapping[atom2])
            structure.bonds.append(bond)

        box = self.boundingbox
        box_vector = np.empty(6)
        box_vector[3] = box_vector[4] = box_vector[5] = 90.0
        for dim, val in enumerate(self.periodicity):
            if val:
                box_vector[dim] = val * 10
            else:
                box_vector[dim] = box.lengths[dim] * 10 + 5
        structure.box = box_vector
        return structure
Пример #6
0
    def read(self, line):
        """ Reads a line

        Parameters
        ----------
        line : str
            A line with an atom record from a GRO file

        Returns
        -------
        atom, resname, resnum : Atom, str, int
            The Atom instance, residue name, and residue number containing the
            atom
        """
        resnum = int(line[:5])
        resname = line[5:10].strip()
        atomname = line[10:15].strip()
        elem = element_by_name(atomname)
        atomic_number = AtomicNum[elem]
        mass = Mass[elem]
        atnum = int(line[15:20])
        if atomic_number == 0:
            atom = ExtraPoint(name=atomname, number=atnum)
        else:
            atom = Atom(atomic_number=atomic_number, name=atomname,
                        number=atnum, mass=mass)
        if self._digits is None:
            self._pdeci = line.index('.', 20)
            self._ndeci = line.index('.', self._pdeci+1)
            self._digits = self._ndeci - self._pdeci
        atom.xx, atom.xy, atom.xz = (
                float(line[20+i*self._digits:20+(i+1)*self._digits])*10
                    for i in range(3)
        )
        wbeg = 20 + self._digits * 3
        wend = wbeg + self._digits
        if line[wbeg:wend].strip():
            atom.vx, atom.vy, atom.vz = (
                    float(line[wbeg+i*self._digits:wend+i*self._digits])*10
                    for i in range(3)
            )
        return atom, resname, resnum
Пример #7
0
    def to_parmed(self, title='', **kwargs):
        """Create a ParmEd Structure from a Compound. """
        structure = pmd.Structure()
        structure.title = title if title else self.name
        atom_mapping = {}  # For creating bonds below
        for atom in self.particles():
            atomic_number = None
            try:
                atomic_number = AtomicNum[atom.name]
            except KeyError:
                element = element_by_name(atom.name)
                warn('Guessing that {} is element: {}'.format(atom, element))
            else:
                element = atom.name

            atomic_number = atomic_number or AtomicNum[element]
            mass = Mass[element]
            pmd_atom = pmd.Atom(atomic_number=atomic_number, name=atom.name,
                                mass=mass)
            pmd_atom.xx, pmd_atom.xy, pmd_atom.xz = atom.pos * 10  # Angstroms
            structure.add_atom(pmd_atom, resname='RES', resnum=1)
            atom_mapping[atom] = pmd_atom

        for atom1, atom2 in self.bonds():
            bond = pmd.Bond(atom_mapping[atom1], atom_mapping[atom2])
            structure.bonds.append(bond)

        box = self.boundingbox
        box_vector = np.empty(6)
        box_vector[3] = box_vector[4] = box_vector[5] = 90.0
        for dim, val in enumerate(self.periodicity):
            if val:
                box_vector[dim] = val * 10
            else:
                box_vector[dim] = box.lengths[dim] * 10 + 5
        structure.box = box_vector
        return structure
Пример #8
0
    def parse(filename):
        """ Parses a Gromacs GRO file

        Parameters
        ----------
        filename : str or file-like
            Name of the file or the GRO file object

        Returns
        -------
        struct : :class:`Structure`
            The Structure instance instantiated with *just* residues and atoms
            populated (with coordinates)
        """
        struct = Structure()
        if isinstance(filename, string_types):
            fileobj = genopen(filename, 'r')
            own_handle = True
        else:
            fileobj = filename
            own_handle = False
        try:
            # Ignore the title line
            fileobj.readline()
            try:
                natom = int(fileobj.readline().strip())
            except ValueError:
                raise GromacsError('Could not parse %s as GRO file' % filename)
            digits = None
            for i, line in enumerate(fileobj):
                if i == natom: break
                try:
                    resnum = int(line[:5])
                    resname = line[5:10].strip()
                    atomname = line[10:15].strip()
                    elem = element_by_name(atomname)
                    atomic_number = AtomicNum[elem]
                    mass = Mass[elem]
                    atnum = int(line[15:20])
                    if atomic_number == 0:
                        atom = ExtraPoint(name=atomname, number=atnum)
                    else:
                        atom = Atom(atomic_number=atomic_number, name=atomname,
                                    number=atnum, mass=mass)
                    if digits is None:
                        pdeci = line.index('.', 20)
                        ndeci = line.index('.', pdeci+1)
                        digits = ndeci - pdeci
                    atom.xx, atom.xy, atom.xz = (
                            float(line[20+i*digits:20+(i+1)*digits])*10
                                for i in range(3)
                    )
                    i = 4
                    wbeg = (pdeci-4)+(5+ndeci)*(i-1)
                    wend = (pdeci-4)+(5+ndeci)*i
                    if line[wbeg:wend].strip():
                        atom.vx, atom.vy, atom.vz = (
                                float(line[(pdeci-3)+(6+ndeci)*i:
                                           (pdeci-3)+(6+ndeci)*(i+1)])*10
                                for i in range(3, 6)
                        )
                except (ValueError, IndexError):
                    raise GromacsError('Could not parse the atom record of '
                                       'GRO file %s' % filename)
                struct.add_atom(atom, resname, resnum)
            # Get the box from the last line if it's present
            if line.strip():
                try:
                    box = [float(x) for x in line.split()]
                except ValueError:
                    raise GromacsError('Could not understand box line of GRO '
                                       'file %s' % filename)
                if len(box) == 3:
                    struct.box = [box[0]*10, box[1]*10, box[2]*10,
                                  90.0, 90.0, 90.0]
                elif len(box) == 9:
                    # Assume we have vectors
                    leng, ang = box_vectors_to_lengths_and_angles(
                                [box[0], box[3], box[4]]*u.nanometers,
                                [box[5], box[1], box[6]]*u.nanometers,
                                [box[7], box[8], box[2]]*u.nanometers)
                    a, b, c = leng.value_in_unit(u.angstroms)
                    alpha, beta, gamma = ang.value_in_unit(u.degrees)
                    struct.box = [a, b, c, alpha, beta, gamma]
        finally:
            if own_handle:
                fileobj.close()

        return struct
Пример #9
0
    def parse(filename):
        """ Read a PQR file and return a populated `Structure` class

        Parameters
        ----------
        filename : str or file-like
            Name of the PQR file to read, or a file-like object that can iterate
            over the lines of a PQR. Compressed file names can be specified and
            are determined by file-name extension (e.g., file.pqr.gz,
            file.pqr.bz2)

        Returns
        -------
        structure : :class:`Structure`
            The Structure object initialized with all of the information from
            the PDB file.  No bonds or other topological features are added by
            default.
        """
        if isinstance(filename, string_types):
            own_handle = True
            fileobj = genopen(filename, "r")
        else:
            own_handle = False
            fileobj = filename

        struct = Structure()
        # Add metadata fields
        modelno = 1  # For PDB files with multiple MODELs
        atomno = 0
        coordinates = []
        all_coordinates = []

        # Support hexadecimal numbering like that printed by VMD
        try:
            for line in fileobj:
                words = line.split()
                if words[0] in ("ATOM", "HETATM"):
                    atomno += 1
                    if len(words) == 10:
                        _, num, nam, res, resn, x, y, z, chg, rad = words
                        chn = ""
                    elif len(words) >= 11:
                        _, num, nam, res, chn, resn, x, y, z, chg, rad = (words[i] for i in range(11))
                        # If the radius is not a float (but rather a letter,
                        # like the element or something), then the chain might
                        # be missing. In this case, shift all tokens "back" one
                        # and empty the chn string
                        try:
                            float(rad)
                        except ValueError:
                            resn, x, y, z, chg, rad = chn, resn, x, y, z, chg
                    else:
                        raise ValueError("Illegal PQR record format: expected " "10 or 11 tokens on the atom line")
                    x, y, z = float(x), float(y), float(z)
                    chg, rad = float(chg), float(rad)
                    resn, num = int(resn), int(num)
                    elem = element_by_name(nam)  # Yuck
                    atomic_number = AtomicNum[elem]
                    mass = Mass[elem]
                    if nam in ("EP", "LP"):  # lone pair
                        atom = ExtraPoint(
                            atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad
                        )
                    else:
                        atom = Atom(
                            atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad
                        )
                    atom.xx, atom.xy, atom.xz = float(x), float(y), float(z)
                    if modelno == 1:
                        struct.add_atom(atom, res, resn, chn)
                    else:
                        try:
                            orig_atom = struct.atoms[atomno - 1]
                        except IndexError:
                            raise PDBError("Extra atom in MODEL %d" % modelno)
                        if orig_atom.residue.name != res.strip() or orig_atom.name != nam.strip():
                            raise PDBError(
                                "Atom %d differs in MODEL %d [%s %s "
                                "vs. %s %s]" % (atomno, modelno, orig_atom.residue.name, orig_atom.name, res, nam)
                            )
                    coordinates.extend([atom.xx, atom.xy, atom.xz])
                elif words[0] == "ENDMDL":
                    # End the current model
                    if len(struct.atoms) == 0:
                        raise PDBError("MODEL ended before any atoms read in")
                    modelno += 1
                    if len(struct.atoms) * 3 != len(coordinates):
                        raise PDBError("Inconsistent atom numbers in some PDB models")
                    all_coordinates.append(coordinates)
                    atomno = 0
                    coordinates = []
                elif words[0] == "MODEL":
                    if modelno == 1 and len(struct.atoms) == 0:
                        continue
                    if len(coordinates) > 0:
                        if len(struct.atoms) * 3 != len(coordinates):
                            raise PDBError("Inconsistent atom numbers in " "some PDB models")
                        warnings.warn("MODEL not explicitly ended", PDBWarning)
                        all_coordinates.append(coordinates)
                        coordinates = []
                    modelno += 1
                    atomno = 0
                elif words[0] == "CRYST1":
                    a, b, c = (float(w) for w in words[1:4])
                    try:
                        A, B, C = (float(w) for w in words[4:7])
                    except ValueError:
                        A = B = C = 90.0
                    struct.box = [a, b, c, A, B, C]
        finally:
            if own_handle:
                fileobj.close()

        struct.unchange()
        if coordinates:
            if len(coordinates) != 3 * len(struct.atoms):
                raise PDBError("bad number of atoms in some PQR models")
            all_coordinates.append(coordinates)
        struct._coordinates = np.array(all_coordinates).reshape((-1, len(struct.atoms), 3))
        return struct
Пример #10
0
    def parse(filename):
        """ Read a PQR file and return a populated `Structure` class

        Parameters
        ----------
        filename : str or file-like
            Name of the PQR file to read, or a file-like object that can iterate
            over the lines of a PQR. Compressed file names can be specified and
            are determined by file-name extension (e.g., file.pqr.gz,
            file.pqr.bz2)

        Returns
        -------
        structure : :class:`Structure`
            The Structure object initialized with all of the information from
            the PDB file.  No bonds or other topological features are added by
            default.
        """
        if isinstance(filename, string_types):
            own_handle = True
            fileobj = genopen(filename, 'r')
        else:
            own_handle = False
            fileobj = filename

        struct = Structure()
        # Add metadata fields
        modelno = 1  # For PDB files with multiple MODELs
        atomno = 0
        coordinates = []
        all_coordinates = []

        # Support hexadecimal numbering like that printed by VMD
        try:
            for line in fileobj:
                words = line.split()
                if words[0] in ('ATOM', 'HETATM'):
                    atomno += 1
                    if len(words) == 10:
                        _, num, nam, res, resn, x, y, z, chg, rad = words
                        chn = ''
                    elif len(words) >= 11:
                        _, num, nam, res, chn, resn, x, y, z, chg, rad = (
                            words[i] for i in range(11))
                        # If the radius is not a float (but rather a letter,
                        # like the element or something), then the chain might
                        # be missing. In this case, shift all tokens "back" one
                        # and empty the chn string
                        try:
                            float(rad)
                        except ValueError:
                            resn, x, y, z, chg, rad = chn, resn, x, y, z, chg
                    else:
                        raise ValueError('Illegal PQR record format: expected '
                                         '10 or 11 tokens on the atom line')
                    x, y, z = float(x), float(y), float(z)
                    chg, rad = float(chg), float(rad)
                    resn, num = int(resn), int(num)
                    elem = element_by_name(nam)  # Yuck
                    atomic_number = AtomicNum[elem]
                    mass = Mass[elem]
                    if nam in ('EP', 'LP'):  # lone pair
                        atom = ExtraPoint(atomic_number=atomic_number,
                                          name=nam,
                                          charge=chg,
                                          mass=mass,
                                          number=num,
                                          solvent_radius=rad)
                    else:
                        atom = Atom(atomic_number=atomic_number,
                                    name=nam,
                                    charge=chg,
                                    mass=mass,
                                    number=num,
                                    solvent_radius=rad)
                    atom.xx, atom.xy, atom.xz = float(x), float(y), float(z)
                    if modelno == 1:
                        struct.add_atom(atom, res, resn, chn)
                    else:
                        try:
                            orig_atom = struct.atoms[atomno - 1]
                        except IndexError:
                            raise PDBError('Extra atom in MODEL %d' % modelno)
                        if (orig_atom.residue.name != res.strip()
                                or orig_atom.name != nam.strip()):
                            raise PDBError(
                                'Atom %d differs in MODEL %d [%s %s '
                                'vs. %s %s]' %
                                (atomno, modelno, orig_atom.residue.name,
                                 orig_atom.name, res, nam))
                    coordinates.extend([atom.xx, atom.xy, atom.xz])
                elif words[0] == 'ENDMDL':
                    # End the current model
                    if len(struct.atoms) == 0:
                        raise PDBError('MODEL ended before any atoms read in')
                    modelno += 1
                    if len(struct.atoms) * 3 != len(coordinates):
                        raise PDBError(
                            'Inconsistent atom numbers in some PDB models')
                    all_coordinates.append(coordinates)
                    atomno = 0
                    coordinates = []
                elif words[0] == 'MODEL':
                    if modelno == 1 and len(struct.atoms) == 0: continue
                    if len(coordinates) > 0:
                        if len(struct.atoms) * 3 != len(coordinates):
                            raise PDBError('Inconsistent atom numbers in '
                                           'some PDB models')
                        warnings.warn('MODEL not explicitly ended', PDBWarning)
                        all_coordinates.append(coordinates)
                        coordinates = []
                    modelno += 1
                    atomno = 0
                elif words[0] == 'CRYST1':
                    a, b, c = (float(w) for w in words[1:4])
                    try:
                        A, B, C = (float(w) for w in words[4:7])
                    except ValueError:
                        A = B = C = 90.0
                    struct.box = [a, b, c, A, B, C]
        finally:
            if own_handle: fileobj.close()

        struct.unchange()
        if coordinates:
            if len(coordinates) != 3 * len(struct.atoms):
                raise PDBError('bad number of atoms in some PQR models')
            all_coordinates.append(coordinates)
        struct._coordinates = np.array(all_coordinates).reshape(
            (-1, len(struct.atoms), 3))
        return struct