def pdb4all2parmed(structure: 'PDB') -> 'Structure': structure.guess_elements() struc_pmd = Structure() for a in structure.pdb: atom = Atom(atomic_number=Ptable[a['element']]['N'], name=a['name'], number=a['serial']) struc_pmd.add_atom(atom=atom, resname=a['resName'], resnum=a['resSeq'], chain=a['chainID'], segid=a['segment']) struc_pmd.coordinates = structure.xyz struc_pmd.assign_bonds() return struc_pmd
def parse(filename, skip_bonds=False): """ Parses a Gromacs GRO file Parameters ---------- filename : str or file-like Name of the file or the GRO file object skip_bonds : bool, optional If True, skip trying to assign bonds. This can save substantial time when parsing large files with non-standard residue names. However, no bonds are assigned. This is OK if, for instance, the GRO file is being parsed simply for its coordinates. This will also reduce the accuracy of assigned atomic numbers for typical ions. Default is False. Returns ------- struct : :class:`Structure` The Structure instance instantiated with *just* residues and atoms populated (with coordinates) """ struct = Structure() if isinstance(filename, string_types): fileobj = genopen(filename, 'r') own_handle = True else: fileobj = filename own_handle = False try: # Ignore the title line fileobj.readline() try: natom = int(fileobj.readline().strip()) except ValueError: raise GromacsError('Could not parse %s as GRO file' % filename) line_parser = _AtomLineParser() for i, line in enumerate(fileobj): if i == natom: break try: atom, resname, resnum = line_parser.read(line) except (ValueError, IndexError): raise GromacsError('Could not parse the atom record of ' 'GRO file %s' % filename) struct.add_atom(atom, resname, resnum) else: # If no box exists, the break did not hit, so line still # contains the last atom (which cannot be interpreted as a box). # This wipes out line (IFF fileobj reached the line) line = fileobj.readline() if i+1 != natom: raise GromacsError('Truncated GRO file. Found %d of %d ' 'atoms' % (i+1, natom)) # Get the box from the last line if it's present if line.strip(): try: box = [float(x) for x in line.split()] except ValueError: raise GromacsError('Could not understand box line of GRO ' 'file %s' % filename) if len(box) == 3: struct.box = [box[0]*10, box[1]*10, box[2]*10, 90.0, 90.0, 90.0] elif len(box) == 9: # Assume we have vectors leng, ang = box_vectors_to_lengths_and_angles( [box[0], box[3], box[4]]*u.nanometers, [box[5], box[1], box[6]]*u.nanometers, [box[7], box[8], box[2]]*u.nanometers) a, b, c = leng.value_in_unit(u.angstroms) alpha, beta, gamma = ang.value_in_unit(u.degrees) struct.box = [a, b, c, alpha, beta, gamma] finally: if own_handle: fileobj.close() # Assign bonds (and improved element guesses) if not skip_bonds: struct.assign_bonds() return struct
def parse(filename, skip_bonds=True): """ Read a PQR file and return a populated `Structure` class Parameters ---------- filename : str or file-like Name of the PQR file to read, or a file-like object that can iterate over the lines of a PQR. Compressed file names can be specified and are determined by file-name extension (e.g., file.pqr.gz, file.pqr.bz2) skip_bonds : bool, optional If True, skip trying to assign bonds. This can save substantial time when parsing large files with non-standard residue names. However, no bonds are assigned. This is OK if, for instance, the PQR file is being parsed simply for its coordinates. Default is False. Returns ------- structure : :class:`Structure` The Structure object initialized with all of the information from the PDB file. No bonds or other topological features are added by default. """ if isinstance(filename, string_types): own_handle = True fileobj = genopen(filename, 'r') else: own_handle = False fileobj = filename struct = Structure() # Add metadata fields modelno = 1 # For PDB files with multiple MODELs atomno = 0 coordinates = [] all_coordinates = [] # Support hexadecimal numbering like that printed by VMD try: for line in fileobj: words = line.split() if words[0] in ('ATOM', 'HETATM'): atomno += 1 if len(words) == 10: _, num, nam, res, resn, x, y, z, chg, rad = words chn = '' elif len(words) >= 11: _, num, nam, res, chn, resn, x, y, z, chg, rad = ( words[i] for i in range(11)) # If the radius is not a float (but rather a letter, # like the element or something), then the chain might # be missing. In this case, shift all tokens "back" one # and empty the chn string try: float(rad) except ValueError: resn, x, y, z, chg, rad = chn, resn, x, y, z, chg else: raise ValueError('Illegal PQR record format: expected ' '10 or 11 tokens on the atom line') x, y, z = float(x), float(y), float(z) chg, rad = float(chg), float(rad) resn, num = int(resn), int(num) elem = element_by_name(nam) # Yuck atomic_number = AtomicNum[elem] mass = Mass[elem] if nam in ('EP', 'LP'): # lone pair atom = ExtraPoint(atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad) else: atom = Atom(atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad) atom.xx, atom.xy, atom.xz = float(x), float(y), float(z) if modelno == 1: struct.add_atom(atom, res, resn, chn) else: try: orig_atom = struct.atoms[atomno-1] except IndexError: raise PDBError('Extra atom in MODEL %d' % modelno) if (orig_atom.residue.name != res.strip() or orig_atom.name != nam.strip()): raise PDBError('Atom %d differs in MODEL %d [%s %s ' 'vs. %s %s]' % (atomno, modelno, orig_atom.residue.name, orig_atom.name, res, nam)) coordinates.extend([atom.xx, atom.xy, atom.xz]) elif words[0] == 'ENDMDL': # End the current model if len(struct.atoms) == 0: raise PDBError('MODEL ended before any atoms read in') modelno += 1 if len(struct.atoms)*3 != len(coordinates): raise PDBError( 'Inconsistent atom numbers in some PDB models') all_coordinates.append(coordinates) atomno = 0 coordinates = [] elif words[0] == 'MODEL': if modelno == 1 and len(struct.atoms) == 0: continue if len(coordinates) > 0: if len(struct.atoms)*3 != len(coordinates): raise PDBError('Inconsistent atom numbers in ' 'some PDB models') warnings.warn('MODEL not explicitly ended', PDBWarning) all_coordinates.append(coordinates) coordinates = [] modelno += 1 atomno = 0 elif words[0] == 'CRYST1': a, b, c = (float(w) for w in words[1:4]) try: A, B, C = (float(w) for w in words[4:7]) except ValueError: A = B = C = 90.0 struct.box = [a, b, c, A, B, C] finally: if own_handle: fileobj.close() struct.unchange() if not skip_bonds: struct.assign_bonds() if coordinates: if len(coordinates) != 3*len(struct.atoms): raise PDBError('bad number of atoms in some PQR models') all_coordinates.append(coordinates) struct._coordinates = np.array(all_coordinates).reshape( (-1, len(struct.atoms), 3)) return struct