def __init__(self): """ Create a new empty Model instance. Don't use this constructor directly. """ #: starting (atom) position of each residue self._resIndex = None #: starting position of each chain self._chainIndex = None #: values associated with atoms self.atoms = B.ProfileCollection() #: values associated with residues self.residues = B.ProfileCollection() #: values associated with chains or molecules self.chains = B.ProfileCollection() for key in self.ATOM_KEYS: self.atoms.set(key, [], asarray=False) for key in self.RESIDUE_KEYS: self.residues.set(key, [], asarray=False) for key in self.CHAIN_KEYS: self.chains.set(key, [], asarray=False) #: Meta info self.info = {'date': T.dateSortString()} self.__version__ = self.version()
def getProteinAtomsAroundHotspot(self, hotspot, pdb, fromCentroid=True, radius=6, includeH=True, returnmask=False): """ Get pdb indices for the atoms neighbouring the hotspot around a given distance. Distance will be calculated from the hotspot centroid or considering all points insed depending on fromCentroid value. Also it is optional to include the hydrogens (default) or remove them from the returned list. Args: hotspot (HotSpot or HotSpotSet) Fetch atoms around this hotspots pdb (PDBModel or string) What PDB to return atoms from. Should be a Biskit.PDBModel or a string pointing to a correct pdb file to read. fromCentroid (bool) Calculate from centroid point or considering all points in the hotspot? radius (float) Radius around we consider an atom as neighbour includeH (bool) Include hydrogens in returen indices? returnMask (bool) Return as mask instead of list of indices? Returns: indicesList (list of ints) Indices in the pdb corresponding to neighbouring atoms or indicesMask (npy.ndarray of bools) Mask for the PDBModel """ import Biskit as bi import scipy if scipy.__version__ > "0.11.0": from scipy.spatial import KDTree as KDTree else: from scipy.spatial import KDTree # Load PDB if string # and get Hydrogens mask if isinstance(pdb, str): pdb = bi.PDBModel(pdb) hids = npy.where(pdb.maskH())[0] # First construct a KDTree with protein coordinates. tree = KDTree(pdb.xyz) # 3dims, 1 atom per bucket # Find neighbours # check first if hotspot has multiple coords or just one if hotspot.coordList.ndim == 1: dims = 1 else: dims = 2 if fromCentroid or dims == 1: # Calculate n'bours to one point rawids = tree.query_ball_point(hotspot.coord, radius) if not includeH: ids = [ri for ri in rawids if ri not in hids] # check index is not a hydrogen atom else: ids = rawids else: # Do a search for each point and get a unique set of n'bours rawids = tree.query_ball_point(hotspot.coordList, radius) if not includeH: ids = [ri for ri in rawids if ri not in hids] # check index is not a hydrogen atom else: ids = rawids if returnmask: m = [i in ids for i in range(len(pdb.xyz))] return m else: return ids
def test_autoprepare_2(self): """Test main AutoPrepare class. Prepare PDBID 1a0o chains A, B == [0,1]""" f_check = T.testRoot('autoprepare', '1yer_h.pdb') self.prepare = AutoPrepare(pdbid='1yer', chains=[0,1]) self.checkpdb = bi.PDBModel(f_check) # self.savePdb(osp.join(T.testRoot(),'1a0o_prepared.pdb')) self.comparePDB(self.prepare.getPdb(), self.checkpdb)
def fetchPDB(self, id): try: h = self.getLocalPDBHandle(id) except: h = self.getRemotePDBHandle(id) fname = tempfile.mktemp('.pdb', 'ncbiparser_') lines, infos = self.parsePdbFromHandle(h, first_model_only=1) ## close if it is a handle try: h.close() except: pass f = open(fname, 'w', 1) f.writelines(lines) f.close() m = B.PDBModel(fname) m.disconnect() m.pdbCode = id m.info.update(infos) T.tryRemove(fname) return m
def test_autoprepare_1(self): """Test main AutoPrepare class. Prepare existing pdb file.""" f_in = T.testRoot('autoprepare', '1yer.pdb') f_check = T.testRoot('autoprepare', '1yer_h.pdb') self.prepare = AutoPrepare(pdb=f_in, protonate=True) self.checkpdb = bi.PDBModel(f_check) self.comparePDB(self.prepare.getPdb(), self.checkpdb)
def test_PQRParser(self): """PQRParser test. Read a PQR file""" f_in = T.testRoot('autoprepare', '1yer.pqr') f_check = T.testRoot('autoprepare', '1yer_h.pdb') self.checkpdb = bi.PDBModel(f_check) self.pqr = PQRParseFile(f_in).getModel() # self.pqr.writePdb(f_check) self.comparePDB(self.pqr, self.checkpdb)
def test_ReadNAMDDCD(self): """Read DCD trajectory""" dcd = T.testRoot('namd','min.dcd') pdb = bi.PDBModel(T.testRoot('namd','pep_WAT_WAT_1.pdb')) try: traj = TrajFile(dcd, pdb) except: traj = False self.assertTrue(traj)
def test_readAmberAscii(self): """Read Amber Ascii trajectory""" x = T.testRoot('amber','traj.x') pdb = bi.PDBModel(T.testRoot('amber','pep_WAT_WAT_1.pdb')) try: traj = TrajFile(x, pdb) except: traj = False self.assertTrue(traj)
def test_readAmberNetCDF(self): """Read Amber NetCDF trajectory""" nc = T.testRoot('amber','traj.nc') pdb = bi.PDBModel(T.testRoot('amber','pep_WAT_WAT_1.pdb')) try: traj = TrajFile(nc, pdb) except: traj = False self.assertTrue(traj)
def setPdb(self, pdb): """ Set PDB to prepare. :arg str|PDBModel pdb: PDB file path or PDBModel. :raises AutoPrepareError: If argument of wrong type. """ if isinstance(pdb, str) and osp.exists(pdb): self.pdb = bi.PDBModel(pdb) elif isinstance(pdb, bi.PDBModel): self.pdb = pdb else: raise AutoPrepareError, "setPdb argument should be a pdb filepath or a PDBModel"
def capNME( self, model, chain ): """ Cap C-terminal of given chain. :arg model: model :type model: PDBMode :arg chain: index of chain to be capped :type chain: int """ self.log.add('Capping C-terminal of chain %i.' % chain ) m_nme = bi.PDBModel( self.F_nme_cap ) chains_before = None chains_after = None if chain > 0 : chains_before = model.takeChains( range(chain), breaks=1 ) if chain < model.lenChains(1) - 1: chains_after = model.takeChains( range(chain+1, model.lenChains(1)), breaks=1 ) m_chain = model.takeChains( [chain], breaks=1 ) m_term = m_chain.resModels()[-1] ## rename overhanging residue in cap PDB, renumber cap residue for a in m_nme: if a['residue_name'] != 'NME': a['residue_name'] = m_term.atoms['residue_name'][0] else: a['residue_number'] = m_term.atoms['residue_number'][0]+1 a['chain_id'] = m_term.atoms['chain_id'][0] a['segment_id'] = m_term.atoms['segment_id'][0] ## chain should not have any terminal O after capping m_chain.remove( ['OXT'] ) ## fit cap onto last residue of chain m_nme = m_nme.magicFit( m_term ) ## concat cap on chain m_chain = m_chain.concat( m_nme.resModels()[-1] ) m_chain.mergeChains(0, renumberAtoms=True) ## should be obsolete now if getattr( m_chain, '_PDBModel__terAtoms', []) != []: m_chain._PDBModel__terAtoms = [ len( m_chain ) - 1 ] ## re-assemble whole model assemble_list = [] if chains_before: assemble_list.append(chains_before) assemble_list.append(m_chain) if chains_after: assemble_list.append(chains_after) if len(assemble_list) > 1: return assemble_list[0].concat(*assemble_list[1:]) else: return assemble_list[0]
def test_PDBParseModel(self): """PDBParseModel test""" ## loading output file from X-plor if self.local: print 'Loading pdb file ..' self.p = PDBParseModel() self.m = self.p.parse2new(B.PDBModel(T.testRoot() + '/rec/1A2P.pdb')) self.assertAlmostEqual(N.sum(self.m.centerOfMass()), 113.682601929, 2)
def capACE( self, model, chain ): """ Cap N-terminal of given chain. :arg model: model :type model: PDBMode :arg chain: index of chain to be capped :type chain: int """ self.log.add('Capping N-terminal of chain %i.' % chain ) m_ace = bi.PDBModel( self.F_ace_cap ) chains_before = None chains_after = None if chain > 0 : chains_before = model.takeChains( range(chain), breaks=1 ) if chain < model.lenChains(1) - 1: chains_after = model.takeChains( range(chain+1, model.lenChains(1)), breaks=1 ) m_chain = model.takeChains( [chain], breaks=1 ) m_term = m_chain.resModels()[0] ## we need 3 atoms for superposition, CB might mess things up but ## could help if there is no HN if 'HN' in m_term.atomNames(): m_ace.remove( ['CB'] ) ## rename overhanging residue in cap PDB for a in m_ace: if a['residue_name'] != 'ACE': a['residue_name'] = m_term.atoms['residue_name'][0] else: a['residue_number'] = m_term.atoms['residue_number'][0]-1 a['chain_id'] = m_term.atoms['chain_id'][0] a['segment_id'] = m_term.atoms['segment_id'][0] ## fit cap onto first residue of chain m_ace = m_ace.magicFit( m_term ) ## concat cap on chain ## merge chains m_chain = m_ace.resModels()[0].concat( m_chain ) m_chain.mergeChains(0, renumberAtoms=True) ## re-assemble whole model assemble_list = [] if chains_before: assemble_list.append(chains_before) assemble_list.append(m_chain) if chains_after: assemble_list.append(chains_after) if len(assemble_list) > 1: return assemble_list[0].concat(*assemble_list[1:]) else: return assemble_list[0]
def __createRef(self): """ Create reference pdb and store it in self.ref """ import Biskit as bi # Initiate AmberCreateSystem with loaded AmberOFF self.__initCreate() self.create.saveAmberParm(self.unitName, 'tmp.top', 'tmp.crd') self.create.ambpdb('tmp.top', 'tmp.crd', 'tmp.pdb') self.ref = bi.PDBModel('tmp.pdb') self.ref.source.set_string("System %s reference" % self.name) # Remove tmp files [os.remove('tmp.' + ext) for ext in ('top', 'crd', 'pdb')] self.__cleanCreate()
def __calcGridDimensionsAndOrigin(self): "From reference PDB calculate dimensions of the grid to be calculated and the origin" refpdb = bi.PDBModel(osp.join(self.replica.path, self.replica.ref)) maxdist = npy.sqrt( ((refpdb.xyz.max(axis=0) - refpdb.xyz.min(axis=0))**2).sum()) dimensions = npy.round(maxdist) origin = refpdb.xyz.mean(axis=0) - (dimensions / 2.) # Move origin to nearest half integer mask = npy.abs(origin - npy.trunc(origin) ) < 0.5 # True when first decimal is bigger than 0.5 origin = npy.trunc(origin) origin[mask == 0] += 0.5 return dimensions, origin
def loadFile(self): if self.extension in ('x', 'x.gz'): self.traj = bi.AmberCrdParser(self.fname, self.pdb, 1) self.traj.crd.readline() #skip first line self.nextFunction = self.traj.nextFrame elif self.extension in ('nc', 'netcdf'): try: import scipy.io.netcdf as ncdf self.traj = ncdf.NetCDFFile(self.fname, 'r').variables['coordinates'] self.nextFunction = self.returnFrameFromNetcdf except ImportError: raise TrajFileError, "Can't read NetCDF trajectory" elif self.extension in ('dcd', ): from NamdDCDParser import NamdDCDParser self.traj = NamdDCDParser(self.fname, self.pdb, box=1) self.nextFunction = self.traj.read_dcdstep
def getEnergyFromPDBCoords(grid, pdbfile, forceradius=0, temp=300.): """ Using a PDB file for defining coordinates and radius (in angstroms) to extract energies from grid *grid*. Radius will be defined at b-factor column in the PDB. A radius of **zero** will be considered as the point energy. If *forceradius* is defined diferent than zero, this value will override b-factor column. """ import Biskit as bi if osp.exists(pdbfile): pdb = bi.PDBModel(pdbfile) else: raise BadFile, "File name %s not found." % pdbfile # Collect coordinates and radius coords = pdb.xyz if forceradius: rads = forceradius else: rads = pdb['temperature_factor'] # Fetch results return getEnergyValues(grid, coords, rads, temp=temp)
def __mapfacets2Atoms(self, raw_table): """ Create an atom profile 'facets' that contains the facet dictionaries each atom is involved in ( empty list if the atom is not part of the interface). The entries of the profile are Biskit.DictList objects that function like normal lists but have additional functions to directly extract values of the dictionaries in the list. """ facet_profile = [B.DictList() for i in self.model.atomRange()] for facet in raw_table.iterDicts(): a1 = facet['serial_1'] a2 = facet['serial_2'] facet_profile[a1] += [facet] facet_profile[a2] += [facet] self.model.atoms.set('facets', facet_profile, comment=\ 'intervor facets this atom is involved in' ) self.model['n_facets'] = [len(f or []) for f in facet_profile] self.model['n_facets', 'comment'] = 'number of interface facets' self.model['so_min'] = [ min(f.valuesOf('so') or [0]) for f in facet_profile ] self.model['so_max'] = [ max(f.valuesOf('so') or [0]) for f in facet_profile ] self.model['facet_area'] = [ N.sum(f.valuesOf('area') or [0]) for f in facet_profile ] self.model['so_min','comment'] = 'smallest Intervor shelling order of '\ + 'any facet this atom is involved in' self.model['so_max','comment'] = 'highest Intervor shelling order of '\ +'any facet this atom is involved in' self.model['facet_area','comment']='sum of area of all intervor facets'\ + ' this atom is involved in'
def __init__(self, model, map2model, add2model=True): """ Create a new feature for a given Polymer. @param model: existing Polymer @type model: Polymer @param map: map feature positions to positions in the model @type map: [ int ] @param add2model: register the new feature with the parent model [1] @type add2model: bool """ assert isinstance(model, Polymer) self.model = model self.map = N.array(map2model, 'i') self.atoms = B.ProfileMirror(self.model.atoms, self.map) if add2model: self.model.addFeature(self)
def __init__(self, pdb=False, chains=[], protonate=False, pdbid=None, *args, **kwargs): "pdb is initial pdbmodel to work with (optional)" if pdb: self.pdb=bi.PDBModel(pdb) else: self.pdb=False self.pdbid=pdbid self.chains=chains # If PDBID requested, always execute protonate if pdbid: self.fetchPDBandProtonate(pdbid, **kwargs) elif self.pdb: if protonate: self.protonatePDB(pdb=self.pdb, **kwargs) else: pass # Finally do cleaning if self.pdb was set if self.pdb: self.cleanPDB(pdb=None, chains=self.chains, **kwargs)
def parse2new(self, source, disconnect=False, skipRes=None): """ Create a new PDBModel from the source. @param source: source PDB file or pickled PDBModel or PDBModel object @type source: str || file || PDBModel @param disconnect: do *not* associate new model with the source [False] @type disconnect: bool @param skipRes: list residues that should not be parsed @type skipRes: [ str ] @return: new model (always of type PDBModel, regardless of source type) @rtype: PDBModel """ m = B.PDBModel() self.update(m, source, updateMissing=True, skipRes=skipRes) if disconnect: m.disconnect() return m
def finish(self): """Called after a successful intervor run. Overrides Executor hook. """ Executor.finish(self) f_so = self.f_prefix + '_intervor_SO.txt' f_fac = self.f_prefix + '_intervor_interface-IV.txt' facets = B.ProfileCollection() facets['so'] = N.array(self.__parseSO(f_so), N.int32) d = self.__parseFacets(f_fac) for k in d.keys(): facets[k] = d.valuesOf(k) assert len(facets[k]) == len(facets['so']),\ 'number of facet records and shelling order records dont match' self.result = facets self.__mapfacets2Atoms(facets)
def __parseFacets(self, fname): """ -> Biskit.DictList """ result = [] try: r = f = None f = open(fname) lines = f.readlines() lines = ''.join(lines) records = lines.split('FBIeg') records = [r for r in records if len(r) > 10] result = B.DictList([self.__parseFacetRecord(r) for r in records]) except IOError, why: raise IntervorError( 'Error accessing intervor output file %r: %r'\ % (fname, why))
def loadFile(self): if self.extension in ('x','x.gz'): self.traj = bi.AmberCrdParser(self.fname, self.pdb, 1) self.traj.crd.readline() #skip first line self.nextFunction = self.traj.nextFrame elif self.extension in ('nc','netcdf'): try: # Replace Scientific-python NetCDF with the one from MDTraj # Scientific-python does not work #import Scientific.IO.NetCDF as ncdf #self.traj = ncdf.NetCDFFile(self.fname,'r').variables['coordinates'] #self.nextFunction = self.returnFrameFromNetcdf from mdtraj.formats import NetCDFTrajectoryFile as ncdf coordinates, time, cell_lengths, cell_angles = ncdf(self.fname, mode = 'r').read() self.traj = coordinates self.nextFunction = self.returnFrameFromNetcdf except ImportError: raise TrajFileError, "Can't read NetCDF trajectory" elif self.extension in ('dcd',): from NamdDCDParser import NamdDCDParser self.traj = NamdDCDParser(self.fname, self.pdb, box=1) self.nextFunction = self.traj.read_dcdstep
def compile_standard_dict(fpdb, fout, fmask=None, skip_first=True, take_chain=True): """ @param fpdb: str, pdb file containing standard residues @param fout: str, file name for pickled dictionary @param fmask: func, method to mask standard residues """ m = B.PDBModel(fpdb) if take_chain: m = m.takeChains([0]) if skip_first: m.removeRes(0) if fmask is not None: m = m.compress(fmask(m)) d = extract_unique_residues(m) for name in d: d[name] = normalize_residue(d[name]) T.dump(d, fout) return d
import Biskit.tools as T import Biskit as B import numpy as N com = T.load('com.traj') # re-order frames into 4 parallel trajectories frames = N.zeros(len(com), int) for i in range(11): N.put(frames, range(i * 4, i * 4 + 4), N.arange(i, 44, 11)) etraj = B.EnsembleTraj(n_members=4) etraj.frames = com.takeFrames(frames).frames etraj.ref = com.ref etraj.resetFrameNames() etraj.ref.disconnect() # separate protein and DNA into two chains etraj.ref.chainIndex(breaks=True, force=True, cache=True) etraj.ref.addChainId() ## extract only some residues for speed t1 = etraj.takeAtoms(etraj.ref.res2atomIndices(range(10))) t2 = etraj.takeChains([1]) etraj = t1.concatAtoms(t2) T.dump(etraj, 'com_fake.etraj')
def __init__(self, name, args='', template=None, f_in=None, f_out=None, f_err=None, strict=1, catch_out=1, push_inp=1, catch_err=0, node=None, nice=0, cwd=None, tempdir=None, log=None, debug=0, verbose=None, validate=1, **kw): """ Create Executor. *name* must point to an existing program configuration unless *strict*=0. Executor will create a program input from the template and its own fields and put it into f_in. If f_in but no template is given, the unchanged f_in is used as input. If neither is given, the program is called without input. If a node is given, the process is wrapped in a ssh call. If *nice* != 0, the process is preceeded by nice. *cwd* specifies the working directory. By default, this setting is taken from the configuration file which defaults to the current working directory. @param name: program name (configured in .biskit/exe_name.dat) @type name: str @param args: command line arguments @type args: str @param template: template for input file -- this can be the template itself or the path to a file containing it (default: None) @type template: str @param f_in: target for completed input file (default: None, discard) @type f_in: str @param f_out: target file for program output (default: None, discard) @type f_out: str @param f_err: target file for error messages (default: None, discard) @type f_err: str @param strict: strict check of environment and configuration file (default: 1) @type strict: 1|0 @param catch_out: catch output in file (f_out or temporary) (default: 1) @type catch_out: 1|0 @param catch_err: catch errors in file (f_out or temporary) (default: 1) @type catch_err: 1|0 @param push_inp: push input file to process via stdin ('< f_in') [1] @type push_inp: 1|0 @param node: host for calculation (None->no ssh) (default: None) @type node: str @param nice: nice level (default: 0) @type nice: int @param cwd: working directory, overwrites ExeConfig.cwd (default: None) @type cwd: str @param tempdir: folder for temporary files, will be created if not existing (default: None ... use system default) if set to True: create a new tempdir within default @type tempdir: str | True @param log: execution log (None->STOUT) (default: None) @type log: Biskit.LogFile @param debug: keep all temporary files (default: 0) @type debug: 0|1 @param verbose: print progress messages to log (default: log != STDOUT) @type verbose: 0|1 @param validate: validate binary and environment immedeatly (1=default) or only before execution (0) @type validate: 1|0 @param kw: key=value pairs with values for template file or string @type kw: key=value @raise ExeConfigError: if environment is not fit for running the program """ self.exe = ExeConfigCache.get(name, strict=strict) if validate: self.exe.validate() ## see prepare() self.keep_tempdir = True ## set to False if tempdir created new self.tempdir = self.newtempfolder(tempdir) ## repeat in child classes self.f_out = t.absfile(f_out) if not f_out and catch_out: self.f_out = tempfile.mktemp('.out', name + '_', self.tempdir) self.f_err = t.absfile(f_err) if not f_err and catch_err: self.f_err = tempfile.mktemp('.err', name + '_', self.tempdir) self.keep_out = f_out is not None self.keep_inp = f_in is not None #: do not clean up the input file self.catch_out = catch_out #: capture STDOUT into file self.catch_err = catch_err #: capture STDERR into file self.f_in = t.absfile(f_in) ## pre-define name of input file that will be generated later if template and not f_in: self.f_in = tempfile.mktemp('.inp', name + '_', self.tempdir) self.push_inp = push_inp self.args = args self.template = template self.node = node ## or os.uname()[1] self.nice = nice self.debug = debug self.cwd = cwd or self.exe.cwd #: Log object for own messages; undocumented: capture string as well self.log = log or StdLog() if type(self.log) is str: self.log = B.LogFile(self.log) self.verbose = verbose if self.verbose is None: self.verbose = (log is not None) ## these are set by self.run(): self.runTime = 0 #: time needed for last run self.output = None #: STDOUT returned by process self.error = None #: STDERR returned by process self.returncode = None #: int status returned by process self.pid = None #: process ID self.result = None #: set by self.finish() self.initVersion = self.version() self.__dict__.update(kw)
def profLength(self): """ Length of profile:: profLength() -> int; length of first non-None profile or 0 @return: length of first non-None profile or 0 @rtype: int """ return len(self.map) if __name__ == '__main__': import string p = B.ProfileCollection() p['name'] = string.letters p['id'] = range(len(string.letters)) ## mirror looks at every second position mirror = ProfileMirror(p, range(0, len(string.letters), 2)) assert mirror['name'] == list(string.letters[::2]) assert N.all(mirror['id'] == range(0, p.profLength(), 2)) ## create a new profile mirror['m_id'] = range(mirror.profLength()) assert N.all(p['m_id'][::2] == mirror['m_id']) mirror['name'][2] = '#' ## does not have any effect mirror['name'] = ['#'] * mirror.profLength()
#!/usr/bin/python import pybel, sys, Biskit, math, re #### # assign pdb b-factors as to a new column in mol2 file, previously converted to a desired range # from 0.2 to 1 applying a desired cutoff in the bfactors # python get_bfactor_to_mol2.py FILE.pdb FILE.mol2 FILE-OUT.mol2 #### pdb_in = sys.argv[1] mol2_in = sys.argv[2] mol2_out = sys.argv[3] argtf_min = sys.argv[4] bf_cutoff = sys.argv[5] #0 for no correction #open input files pdbfile = Biskit.PDBModel(pdb_in) mol2mols = pybel.readfile("mol2", mol2_in) mol2mol = mol2mols.next() #get atoms from mol2 and pdb mol2atoms = [] for atom in mol2mol: mol2atoms.append(atom) pdbatoms = pdbfile.atoms #initialize ln to 0 ln = 0 #### Correction for temperature factor, in a desired range tf_max = 1 tf_min = float(argtf_min) tf = pdbatoms['temperature_factor']
def prepare(self): self.m = B.PDBModel(T.testRoot() + '/com/1BGS.pdb')