def load_prmtop(filename): """Load an AMBER prmtop topology file from disk. Parameters ---------- filename : str Path to the prmtop file on disk. Returns ------- top : md.Topology The resulting topology, as an md.Topology object. Notes ----- Deprecated fields in the prmtop file are not loaded. This includes the BOX dimensions, which should be stored in trajectory files instead of the prmtop for systems with periodic boundary conditions. Because '.binpos' files do not store box dimensions, this means that unitcell information will be lost if you use .binpos + .prmtop files with MDTraj. Examples -------- >>> topology = md.load_prmtop('mysystem.prmtop') >>> # or >>> trajectory = md.load('trajectory.mdcrd', top='system.prmtop') """ top = topology.Topology() prmtop_version = None flags = [] raw_format = {} raw_data = {} with open(filename, 'r') as f: for line in f: if line.startswith('%VERSION'): tag, prmtop_version = line.rstrip().split(None, 1) elif line.startswith('%FLAG'): tag, flag = line.rstrip().split(None, 1) flags.append(flag) raw_data[flag] = [] elif line.startswith('%FORMAT'): format = line.rstrip() index0 = format.index('(') index1 = format.index(')') format = format[index0 + 1:index1] m = FORMAT_RE_PATTERN.search(format) raw_format[flags[-1]] = (format, m.group(1), m.group(2), m.group(3), m.group(4)) elif flags \ and 'TITLE'==flags[-1] \ and not raw_data['TITLE']: raw_data['TITLE'] = line.rstrip() else: flag = flags[-1] format, numItems, itemType, itemLength, itemPrecision = raw_format[ flag] iLength = int(itemLength) line = line.rstrip() for index in range(0, len(line), iLength): item = line[index:index + iLength] if item: raw_data[flag].append(item.strip()) # Add atoms to the topology pdb.PDBTrajectoryFile._loadNameReplacementTables() previous_residue = None c = top.add_chain() n_atoms = int(_get_pointer_value('NATOM', raw_data)) # built a dictionary telling us which atom belongs to which residue residue_pointer_dict = {} res_pointers = raw_data['RESIDUE_POINTER'] first_atom = [int(p) - 1 for p in res_pointers] # minus 1 necessary first_atom.append(n_atoms) res = 0 for i in range(n_atoms): while first_atom[res + 1] <= i: res += 1 residue_pointer_dict[i] = res # add each residue/atom to the topology object for index in range(n_atoms): res_number = residue_pointer_dict[index] if res_number != previous_residue: previous_residue = res_number # check res_name = raw_data['RESIDUE_LABEL'][ residue_pointer_dict[index]].strip() if res_name in pdb.PDBTrajectoryFile._residueNameReplacements: res_name = pdb.PDBTrajectoryFile._residueNameReplacements[ res_name] r = top.add_residue(res_name, c) if res_name in pdb.PDBTrajectoryFile._atomNameReplacements: atom_replacements = pdb.PDBTrajectoryFile._atomNameReplacements[ res_name] else: atom_replacements = {} atom_name = raw_data['ATOM_NAME'][index].strip() if atom_name in atom_replacements: atom_name = atom_replacements[atom_name] # Get the element from the prmtop file if available if 'ATOMIC_NUMBER' in raw_data: try: element = elem.Element.getByAtomicNumber( int(raw_data['ATOMIC_NUMBER'][index])) except KeyError: element = None else: # Try to guess the element from the atom name. upper = atom_name.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('ZN'): element = elem.zinc else: try: element = elem.get_by_symbol(atom_name[0]) except KeyError: element = None top.add_atom(atom_name, element, r) # Add bonds to the topology bond_pointers = raw_data["BONDS_INC_HYDROGEN"] + raw_data[ "BONDS_WITHOUT_HYDROGEN"] atoms = list(top.atoms) bond_list = [] for ii in range(0, len(bond_pointers), 3): if int(bond_pointers[ii]) < 0 or int(bond_pointers[ii + 1]) < 0: raise Exception("Found negative bonded atom pointers %s" % ((bond_pointers[ii], bond_pointers[ii + 1]), )) else: bond_list.append( (int(bond_pointers[ii]) // 3, int(bond_pointers[ii + 1]) // 3)) for bond in bond_list: top.add_bond(atoms[bond[0]], atoms[bond[1]]) return top
def load_psf(fname): """Load a CHARMM or XPLOR PSF file from disk Parameters ---------- fname : str Path to the PSF file on disk Returns ------- top : md.Topology The resulting topology as an md.Topology object Notes ----- Only the bond and atom sections are read in, and all atoms are added to the same chain in the topology Raises ------ PSFError if any parsing errors occur Examples -------- >>> topology = md.load_psf('mysystem.psf') >>> # or >>> trajectory = md.load('trajectory.dcd', top='system.psf') """ top = topology.Topology() with open(fname, 'r') as f: line = f.readline() if not line.startswith('PSF'): raise PSFError('Unrecognized PSF file.') # Store all of the sections and store them in a dict f.readline() psfsections = dict() while True: try: sec, ptr, data = _parse_psf_section(f) except _PSFEOF: break psfsections[sec] = (ptr, data) # We only have to parse up to the NBOND section if sec == 'NBOND': break prev_residue = (None, None, None) pdb.PDBTrajectoryFile._loadNameReplacementTables() natom = _convert(psfsections['NATOM'][0], int, 'natom') last_chain = None for i in range(natom): words = psfsections['NATOM'][1][i].split() atid = _convert(words[0], int, 'atom index') if atid != i + 1: raise PSFError('Nonsequential atom indices detected!') segid = words[1] resid = _convert(words[2], int, 'residue number') rname = words[3] name = words[4] # attype = words[5] # charge = _convert(words[6], float, 'partial atomic charge') mass = _convert(words[7], float, 'atomic mass') if last_chain != segid: c = top.add_chain() last_chain = segid curr_residue = (resid, rname, segid) if prev_residue != curr_residue: prev_residue = curr_residue try: rname = pdb.PDBTrajectoryFile._residueNameReplacements[rname] except KeyError: pass r = top.add_residue(rname, c, resid) try: name = pdb.PDBTrajectoryFile._atomNameReplacements[rname][name] except KeyError: pass # Try to guess the element from the atom name for some of the common # ions using the names that CHARMM assigns to ions. If it's not one of # these 'weird' ion names, look up the element by mass. If the mass is # 0, assume a lone pair upper = name.upper() if upper.startswith('CLA'): element = elem.chlorine elif upper.startswith('SOD'): element = elem.sodium elif upper.startswith('POT'): element = elem.potassium elif upper == 'CAL': element = elem.calcium elif mass == 0: element = None else: element = elem.Element.getByMass(mass * u.dalton) top.add_atom(name, element, r) # Add bonds to the topology atoms = list(top.atoms) bond_data = psfsections['NBOND'][1] nbond = _convert(psfsections['NBOND'][0], int, 'number of bonds') if len(bond_data) != nbond * 2: raise PSFError('Got %d indexes for %d bonds' % (len(bond_data), nbond)) for i in range(nbond): i2 = i * 2 top.add_bond(atoms[bond_data[i2] - 1], atoms[bond_data[i2 + 1] - 1]) return top