def _parse(self, psffile): """ Parse a psf file. Args: psffile : str, path to the psf file Returns: a Molecule instance or False """ self.lgr.debug("parsing psf file: %s" % psffile) t1 = time.time() # check if the file exists if not os.path.exists(psffile): self.lgr.critical("file doesn't exist") return False # initialize empty list for data mol = blocks.Molecule() # supported formats psf_formats = { 'NAMD': { 'sections': { '!NATOM': { 'type': 'atom', 'n': (9, 11), 'multiple': False, 'func': self._atom_line }, '!NBOND': { 'type': 'bond', 'n': 2, 'multiple': True, 'func': self._badi_line }, '!NTHETA': { 'type': 'angle', 'n': 3, 'multiple': True, 'func': self._badi_line }, '!NPHI': { 'type': 'dihedral', 'n': 4, 'multiple': True, 'func': self._badi_line }, '!NIMPHI': { 'type': 'improper', 'n': 4, 'multiple': True, 'func': self._badi_line }, '!NCRTERM': { 'type': 'cmap', 'n': 8, 'multiple': False, 'func': self._badi_line }, }, }, } # find the psf format with open(psffile) as f: for line in f: psffmt = self._find_psf_format(line) break # check if the format is valid if psffmt is False: # assume the format is 'NAMD' psffmt = 'NAMD' #return False elif psffmt not in list(psf_formats.keys()): self.lgr.error("psf format '%s' is not supported" % (psffmt)) return False # parse the file _sec = None known_sections = list(psf_formats[psffmt]['sections'].keys()) with open(psffile) as f: for line in f: line = line.strip() if line == '': continue if '!' in line: if line.split()[1].strip(':') in known_sections: _sec = line.split()[1].strip(':') else: self.lgr.debug("skipping section: '%s'" % line) _sec = None continue if _sec is not None: _conf = psf_formats[psffmt]['sections'][_sec] result = _conf['func'](psffmt, line, _conf, mol) if result is False: self.lgr.error( "couldn't parse this line in '%s' section:\n %s" % (_sec, line)) return False # build chain and residues build_res_chain(mol) build_pairs(mol, 'charmm') t2 = time.time() self.lgr.debug("parsing took %4.1f seconds" % (t2 - t1)) return mol
def split_psf(self): """Convert a psf Molecule to multiple Molecules. Using this function only makes sense if the segments in the PSF file are not covalently bond (usually this the case). Args: temp_mol : a psf Molecule instance Returns: list of Molecules or False """ assert len(self.molecules) == 1 temp_mol = self.molecules[0] self.lgr.debug("converting psf to multiple molecules based on chains") unique_chains = set([chain.name for chain in temp_mol.chains]) if len(unique_chains) != len(temp_mol.chains): self.lgr.error("the name of the chains is not unique") return False # counter for different elements in the psf file _NA = _B = _A = _D = _I = _C = _NP = 0 molecules = [] # Atom:Chain.name dictionary for easy lookup _AC_map = {} for atom in temp_mol.atoms: _AC_map[atom] = atom.residue.chain.name for chain in temp_mol.chains: m = blocks.Molecule() chainname = chain.name for res in chain.residues: for atom in res.atoms: m.atoms.append(atom) _NA += 1 for b in temp_mol.bonds: if _AC_map[b.atom1] == chainname and _AC_map[ b.atom2] == chainname: m.bonds.append(b) _B += 1 for a in temp_mol.angles: if _AC_map[a.atom1] == chainname and _AC_map[a.atom2] == chainname and \ _AC_map[a.atom3] == chainname: m.angles.append(a) _A += 1 for d in temp_mol.dihedrals: if _AC_map[d.atom1] == chainname and _AC_map[d.atom2] == chainname and \ _AC_map[d.atom3] == chainname and _AC_map[d.atom4] == chainname: m.dihedrals.append(d) _D += 1 for i in temp_mol.impropers: if _AC_map[i.atom1] == chainname and _AC_map[i.atom2] == chainname and \ _AC_map[i.atom3] == chainname and _AC_map[i.atom4] == chainname: m.impropers.append(i) _I += 1 for c in temp_mol.cmaps: if _AC_map[c.atom1] == chainname and _AC_map[c.atom2] == chainname and \ _AC_map[c.atom3] == chainname and _AC_map[c.atom4] == chainname and \ _AC_map[c.atom5] == chainname and _AC_map[c.atom6] == chainname and \ _AC_map[c.atom7] == chainname and _AC_map[c.atom8] == chainname: m.cmaps.append(c) _C += 1 for p in temp_mol.pairs: if _AC_map[p.atom1] == chainname and _AC_map[ p.atom2] == chainname: m.pairs.append(p) _NP += 1 build_res_chain(m) m.renumber_atoms() molecules.append(m) # make sure we used all the enteties in the temp_mol assert len(temp_mol.atoms) == _NA assert len( temp_mol.bonds) == _B, '%d != %d' % (len(temp_mol.bonds), _B) assert len(temp_mol.angles) == _A assert len(temp_mol.dihedrals) == _D assert len(temp_mol.impropers) == _I assert len(temp_mol.cmaps) == _C assert len(temp_mol.pairs) == _NP self.molecules = tuple(molecules)
def _parse(self, psffile): """ Parse a psf file. Args: psffile : str, path to the psf file Returns: a Molecule instance or False """ self.lgr.debug("parsing psf file: %s" % psffile) t1 = time.time() # check if the file exists if not os.path.exists(psffile): self.lgr.critical("file doesn't exist") return False # initialize empty list for data mol = blocks.Molecule() # supported formats psf_formats = { 'NAMD': { 'sections': { '!NATOM': {'type':'atom', 'n':(9,11),'multiple':False,'func':self._atom_line}, '!NBOND': {'type':'bond', 'n':2,'multiple':True, 'func':self._badi_line}, '!NTHETA': {'type':'angle', 'n':3,'multiple':True, 'func':self._badi_line}, '!NPHI': {'type':'dihedral','n':4,'multiple':True, 'func':self._badi_line}, '!NIMPHI': {'type':'improper','n':4,'multiple':True, 'func':self._badi_line}, '!NCRTERM':{'type':'cmap', 'n':8,'multiple':False,'func':self._badi_line}, }, }, } # find the psf format with open(psffile) as f: for line in f: psffmt = self._find_psf_format(line) break # check if the format is valid if psffmt is False: # assume the format is 'NAMD' psffmt = 'NAMD' #return False elif psffmt not in list(psf_formats.keys()): self.lgr.error("psf format '%s' is not supported" % (psffmt)) return False # parse the file _sec = None known_sections = list(psf_formats[psffmt]['sections'].keys()) with open(psffile) as f: for line in f: line = line.strip() if line == '': continue if '!' in line: if line.split()[1].strip(':') in known_sections: _sec = line.split()[1].strip(':') else: self.lgr.debug("skipping section: '%s'" % line) _sec = None continue if _sec is not None: _conf = psf_formats[psffmt]['sections'][_sec] result = _conf['func'](psffmt, line, _conf, mol) if result is False: self.lgr.error("couldn't parse this line in '%s' section:\n %s" % (_sec, line)) return False # build chain and residues build_res_chain(mol) build_pairs(mol, 'charmm') t2 = time.time() self.lgr.debug("parsing took %4.1f seconds" % (t2-t1)) return mol
def split_psf(self): """Convert a psf Molecule to multiple Molecules. Using this function only makes sense if the segments in the PSF file are not covalently bond (usually this the case). Args: temp_mol : a psf Molecule instance Returns: list of Molecules or False """ assert len(self.molecules) == 1 temp_mol = self.molecules[0] self.lgr.debug("converting psf to multiple molecules based on chains") unique_chains = set([chain.name for chain in temp_mol.chains]) if len(unique_chains) != len(temp_mol.chains): self.lgr.error("the name of the chains is not unique") return False # counter for different elements in the psf file _NA= _B = _A = _D = _I = _C = _NP = 0 molecules = [] # Atom:Chain.name dictionary for easy lookup _AC_map = {} for atom in temp_mol.atoms: _AC_map[atom] = atom.residue.chain.name for chain in temp_mol.chains: m = blocks.Molecule() chainname = chain.name for res in chain.residues: for atom in res.atoms: m.atoms.append(atom) _NA += 1 for b in temp_mol.bonds: if _AC_map[b.atom1] == chainname and _AC_map[b.atom2] == chainname: m.bonds.append(b) _B += 1 for a in temp_mol.angles: if _AC_map[a.atom1] == chainname and _AC_map[a.atom2] == chainname and \ _AC_map[a.atom3] == chainname: m.angles.append(a) _A += 1 for d in temp_mol.dihedrals: if _AC_map[d.atom1] == chainname and _AC_map[d.atom2] == chainname and \ _AC_map[d.atom3] == chainname and _AC_map[d.atom4] == chainname: m.dihedrals.append(d) _D += 1 for i in temp_mol.impropers: if _AC_map[i.atom1] == chainname and _AC_map[i.atom2] == chainname and \ _AC_map[i.atom3] == chainname and _AC_map[i.atom4] == chainname: m.impropers.append(i) _I += 1 for c in temp_mol.cmaps: if _AC_map[c.atom1] == chainname and _AC_map[c.atom2] == chainname and \ _AC_map[c.atom3] == chainname and _AC_map[c.atom4] == chainname and \ _AC_map[c.atom5] == chainname and _AC_map[c.atom6] == chainname and \ _AC_map[c.atom7] == chainname and _AC_map[c.atom8] == chainname: m.cmaps.append(c) _C += 1 for p in temp_mol.pairs: if _AC_map[p.atom1] == chainname and _AC_map[p.atom2] == chainname: m.pairs.append(p) _NP += 1 build_res_chain(m) m.renumber_atoms() molecules.append(m) # make sure we used all the enteties in the temp_mol assert len(temp_mol.atoms) == _NA assert len(temp_mol.bonds) == _B, '%d != %d' % (len(temp_mol.bonds), _B) assert len(temp_mol.angles) == _A assert len(temp_mol.dihedrals) == _D assert len(temp_mol.impropers) == _I assert len(temp_mol.cmaps) == _C assert len(temp_mol.pairs) == _NP self.molecules = tuple(molecules)
def _parse(self, pdbfile, guess_mols): self.lgr.debug("parsing pdb file: %s" % pdbfile) t1 = time.time() if not os.path.exists(pdbfile): self.lgr.error("the pdbfile doesn't exist") return _first_model_finished = False atoms = [] molecules = [] _i = 0 # a counter for atom index _alt_loc_warning = False # read the file and create atoms list M = blocks.Molecule() molecules.append(M) with open(pdbfile) as f: for line in f: line = line.strip() if line.startswith('ENDMDL'): _first_model_finished = True # set by first ENDMDL _i = 0 # reset _i if _first_model_finished: # just read the coordinates if line.startswith(('ATOM', 'HETATM')): c = list(map(float, (line[30:38], line[38:46], line[46:54]))) atoms[_i].coords.append(c) _i += 1 else: if line.startswith(('ATOM', 'HETATM')): a = blocks.Atom() a.flag = line[0:6].strip() a.number = self.conv_atom_number(line[6:11]) a.name = line[12:16].strip() a.altloc = line[16].strip() if a.altloc != '' and _alt_loc_warning is False: _alt_loc_warning = True a.resname= line[17:21].strip() a.chain = line[21].strip() a.resnumb= int(line[22:26]) c = list(map(float, (line[30:38], line[38:46], line[46:54]))) a.coords = [tuple(c)] # a list of (x,y,z) tuples, each tuple for one model #TODO occup, bfactor, ... if guess_mols: if len(atoms) > 0: if line.startswith('HETATM'): if a.resname != atoms[-1].resname or a.resnumb != atoms[-1].resnumb: M = blocks.Molecule() molecules.append(M) else: # ATOM if atoms[-1].flag == 'HETATM': M = blocks.Molecule() molecules.append(M) # record a in the local atoms and M.atoms atoms.append(a) M.atoms.append(a) if len(atoms) == 0: self.lgr.warning("no atoms were found in the pdb file") return self.atoms = tuple(atoms) if _alt_loc_warning: self.lgr.warning("there are atom records with altloc flags - fix this") # make sure all the atomic coordinates are the same length for a in atoms: assert len(a.coords) == len(atoms[0].coords) # build residue and chains for m in molecules: build_res_chain(m) self.molecules = tuple(molecules) t2 = time.time() self.lgr.debug("parsing took %4.1f seconds" % (t2-t1))
def _parse(self, pdbfile, guess_mols): self.lgr.debug("parsing pdb file: %s" % pdbfile) t1 = time.time() if not os.path.exists(pdbfile): self.lgr.error("the pdbfile doesn't exist") return _first_model_finished = False atoms = [] molecules = [] _i = 0 # a counter for atom index _alt_loc_warning = False # read the file and create atoms list M = blocks.Molecule() molecules.append(M) with open(pdbfile) as f: for line in f: line = line.strip() if line.startswith('ENDMDL'): _first_model_finished = True # set by first ENDMDL _i = 0 # reset _i if _first_model_finished: # just read the coordinates if line.startswith(('ATOM', 'HETATM')): c = list( map(float, (line[30:38], line[38:46], line[46:54]))) atoms[_i].coords.append(c) _i += 1 else: if line.startswith(('ATOM', 'HETATM')): a = blocks.Atom() a.flag = line[0:6].strip() a.number = self.conv_atom_number(line[6:11]) a.name = line[12:16].strip() a.altloc = line[16].strip() if a.altloc != '' and _alt_loc_warning is False: _alt_loc_warning = True a.resname = line[17:21].strip() a.chain = line[21].strip() a.resnumb = int(line[22:26]) c = list( map(float, (line[30:38], line[38:46], line[46:54]))) a.coords = [ tuple(c) ] # a list of (x,y,z) tuples, each tuple for one model #TODO occup, bfactor, ... if guess_mols: if len(atoms) > 0: if line.startswith('HETATM'): if a.resname != atoms[ -1].resname or a.resnumb != atoms[ -1].resnumb: M = blocks.Molecule() molecules.append(M) else: # ATOM if atoms[-1].flag == 'HETATM': M = blocks.Molecule() molecules.append(M) # record a in the local atoms and M.atoms atoms.append(a) M.atoms.append(a) if len(atoms) == 0: self.lgr.warning("no atoms were found in the pdb file") return self.atoms = tuple(atoms) if _alt_loc_warning: self.lgr.warning( "there are atom records with altloc flags - fix this") # make sure all the atomic coordinates are the same length for a in atoms: assert len(a.coords) == len(atoms[0].coords) # build residue and chains for m in molecules: build_res_chain(m) self.molecules = tuple(molecules) t2 = time.time() self.lgr.debug("parsing took %4.1f seconds" % (t2 - t1))