def parse(cls, filename, structure=False): self = cls(filename) if structure: obj = Structure() for _ in range(self.natom): # fake obj.add_atom(Atom(), resname='XXX', resnum=0) obj.box = self.box obj.coordinates = self.coordinates[0] obj._coordinates = self.coordinates return obj else: return self
def parse(cls, filename, structure=False): self = cls(filename) if structure: obj = Structure() for _ in range(self.natom): # fake obj.add_atom(Atom(), resname='XXX', resnum=0) obj.box = self.box obj.coordinates = self.coordinates[0] obj._coordinates = self.coordinates return obj else: return self
def parse(filename): """ Read a PQR file and return a populated `Structure` class Parameters ---------- filename : str or file-like Name of the PQR file to read, or a file-like object that can iterate over the lines of a PQR. Compressed file names can be specified and are determined by file-name extension (e.g., file.pqr.gz, file.pqr.bz2) Returns ------- structure : :class:`Structure` The Structure object initialized with all of the information from the PDB file. No bonds or other topological features are added by default. """ if isinstance(filename, string_types): own_handle = True fileobj = genopen(filename, "r") else: own_handle = False fileobj = filename struct = Structure() # Add metadata fields modelno = 1 # For PDB files with multiple MODELs atomno = 0 coordinates = [] all_coordinates = [] # Support hexadecimal numbering like that printed by VMD try: for line in fileobj: words = line.split() if words[0] in ("ATOM", "HETATM"): atomno += 1 if len(words) == 10: _, num, nam, res, resn, x, y, z, chg, rad = words chn = "" elif len(words) >= 11: _, num, nam, res, chn, resn, x, y, z, chg, rad = (words[i] for i in range(11)) # If the radius is not a float (but rather a letter, # like the element or something), then the chain might # be missing. In this case, shift all tokens "back" one # and empty the chn string try: float(rad) except ValueError: resn, x, y, z, chg, rad = chn, resn, x, y, z, chg else: raise ValueError("Illegal PQR record format: expected " "10 or 11 tokens on the atom line") x, y, z = float(x), float(y), float(z) chg, rad = float(chg), float(rad) resn, num = int(resn), int(num) elem = element_by_name(nam) # Yuck atomic_number = AtomicNum[elem] mass = Mass[elem] if nam in ("EP", "LP"): # lone pair atom = ExtraPoint( atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad ) else: atom = Atom( atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad ) atom.xx, atom.xy, atom.xz = float(x), float(y), float(z) if modelno == 1: struct.add_atom(atom, res, resn, chn) else: try: orig_atom = struct.atoms[atomno - 1] except IndexError: raise PDBError("Extra atom in MODEL %d" % modelno) if orig_atom.residue.name != res.strip() or orig_atom.name != nam.strip(): raise PDBError( "Atom %d differs in MODEL %d [%s %s " "vs. %s %s]" % (atomno, modelno, orig_atom.residue.name, orig_atom.name, res, nam) ) coordinates.extend([atom.xx, atom.xy, atom.xz]) elif words[0] == "ENDMDL": # End the current model if len(struct.atoms) == 0: raise PDBError("MODEL ended before any atoms read in") modelno += 1 if len(struct.atoms) * 3 != len(coordinates): raise PDBError("Inconsistent atom numbers in some PDB models") all_coordinates.append(coordinates) atomno = 0 coordinates = [] elif words[0] == "MODEL": if modelno == 1 and len(struct.atoms) == 0: continue if len(coordinates) > 0: if len(struct.atoms) * 3 != len(coordinates): raise PDBError("Inconsistent atom numbers in " "some PDB models") warnings.warn("MODEL not explicitly ended", PDBWarning) all_coordinates.append(coordinates) coordinates = [] modelno += 1 atomno = 0 elif words[0] == "CRYST1": a, b, c = (float(w) for w in words[1:4]) try: A, B, C = (float(w) for w in words[4:7]) except ValueError: A = B = C = 90.0 struct.box = [a, b, c, A, B, C] finally: if own_handle: fileobj.close() struct.unchange() if coordinates: if len(coordinates) != 3 * len(struct.atoms): raise PDBError("bad number of atoms in some PQR models") all_coordinates.append(coordinates) struct._coordinates = np.array(all_coordinates).reshape((-1, len(struct.atoms), 3)) return struct
def parse(filename): """ Read a PQR file and return a populated `Structure` class Parameters ---------- filename : str or file-like Name of the PQR file to read, or a file-like object that can iterate over the lines of a PQR. Compressed file names can be specified and are determined by file-name extension (e.g., file.pqr.gz, file.pqr.bz2) Returns ------- structure : :class:`Structure` The Structure object initialized with all of the information from the PDB file. No bonds or other topological features are added by default. """ if isinstance(filename, string_types): own_handle = True fileobj = genopen(filename, 'r') else: own_handle = False fileobj = filename struct = Structure() # Add metadata fields modelno = 1 # For PDB files with multiple MODELs atomno = 0 coordinates = [] all_coordinates = [] # Support hexadecimal numbering like that printed by VMD try: for line in fileobj: words = line.split() if words[0] in ('ATOM', 'HETATM'): atomno += 1 if len(words) == 10: _, num, nam, res, resn, x, y, z, chg, rad = words chn = '' elif len(words) >= 11: _, num, nam, res, chn, resn, x, y, z, chg, rad = ( words[i] for i in range(11)) # If the radius is not a float (but rather a letter, # like the element or something), then the chain might # be missing. In this case, shift all tokens "back" one # and empty the chn string try: float(rad) except ValueError: resn, x, y, z, chg, rad = chn, resn, x, y, z, chg else: raise ValueError('Illegal PQR record format: expected ' '10 or 11 tokens on the atom line') x, y, z = float(x), float(y), float(z) chg, rad = float(chg), float(rad) resn, num = int(resn), int(num) elem = element_by_name(nam) # Yuck atomic_number = AtomicNum[elem] mass = Mass[elem] if nam in ('EP', 'LP'): # lone pair atom = ExtraPoint(atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad) else: atom = Atom(atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad) atom.xx, atom.xy, atom.xz = float(x), float(y), float(z) if modelno == 1: struct.add_atom(atom, res, resn, chn) else: try: orig_atom = struct.atoms[atomno - 1] except IndexError: raise PDBError('Extra atom in MODEL %d' % modelno) if (orig_atom.residue.name != res.strip() or orig_atom.name != nam.strip()): raise PDBError( 'Atom %d differs in MODEL %d [%s %s ' 'vs. %s %s]' % (atomno, modelno, orig_atom.residue.name, orig_atom.name, res, nam)) coordinates.extend([atom.xx, atom.xy, atom.xz]) elif words[0] == 'ENDMDL': # End the current model if len(struct.atoms) == 0: raise PDBError('MODEL ended before any atoms read in') modelno += 1 if len(struct.atoms) * 3 != len(coordinates): raise PDBError( 'Inconsistent atom numbers in some PDB models') all_coordinates.append(coordinates) atomno = 0 coordinates = [] elif words[0] == 'MODEL': if modelno == 1 and len(struct.atoms) == 0: continue if len(coordinates) > 0: if len(struct.atoms) * 3 != len(coordinates): raise PDBError('Inconsistent atom numbers in ' 'some PDB models') warnings.warn('MODEL not explicitly ended', PDBWarning) all_coordinates.append(coordinates) coordinates = [] modelno += 1 atomno = 0 elif words[0] == 'CRYST1': a, b, c = (float(w) for w in words[1:4]) try: A, B, C = (float(w) for w in words[4:7]) except ValueError: A = B = C = 90.0 struct.box = [a, b, c, A, B, C] finally: if own_handle: fileobj.close() struct.unchange() if coordinates: if len(coordinates) != 3 * len(struct.atoms): raise PDBError('bad number of atoms in some PQR models') all_coordinates.append(coordinates) struct._coordinates = np.array(all_coordinates).reshape( (-1, len(struct.atoms), 3)) return struct