def addConservation( self, pfamEntries=None, verbose=0, log=None): """ Adds a conservation score profile from pFam HMMs. See L{Biskit.Hmmer} The theoretically most useful one is 'cons_ent' which gives the relative entropy of the residue distribution with respect to the background distribution of amino acids (Kullback-Leibler distance) in swissprot. See PMID 16916457. @param pfamEntries: External hmmSearch result, list of (non-overlapping) profile hits. (default: None, do the search) Example:: [{'ribonuclease': [[1, 108]]},..] [{profileName : [ [startPos, endPos], [start2, end2]]}] - startPos, endPos as reported by hmmPfam for PDB sequence generated from this model @type pfamEntries: [{dict}] @param verbose: verbosity level (default: 0) @type verbose: 1|0 @param log: Log file for messages [STDOUT] @type log: Biskit.LogFile @raise ExeConfigError: if external application is missing """ ## mask out solvent and other troublemakers mask = self.m.maskProtein() resmask = self.m.atom2resMask( mask ) m = self.m if not N.alltrue( mask ): m = self.m.compress( mask ) h = Hmmer( verbose=verbose, log=log ) h.checkHmmdbIndex() p, hmmHits = h.scoreAbsSum( m, hmmNames=pfamEntries ) self.m.residues.set( 'cons_abs', p, hmmHits=hmmHits, mask=resmask, comment="absolute sum of all 20 hmm scores per position", version= T.dateString() + ' ' + self.version() ) p, hmmHits = h.scoreMaxAll( m, hmmNames=hmmHits ) self.m.residues.set( 'cons_max', p, hmmHits=hmmHits, mask=resmask, comment="max of 20 hmm scores (-average / SD) per position", version= T.dateString() + ' ' + self.version() ) p, hmmHits = h.scoreEntropy( m, hmmNames=hmmHits ) self.m.residues.set( 'cons_ent', p, hmmHits=hmmHits, mask=resmask, comment="relative entropy (Kullback-Leibler distance) between "\ +"observed and background amino acid distribution "\ +"(high -> high conservation/discrimination)", version= T.dateString() + ' ' + self.version() )
def addASA(self): """ Add profiles of Accessible Surface Area: 'relASA', 'ASA_total', 'ASA_sc', 'ASA_bb'. See L{Biskit.WhatIf} @note: Using WhatIf to calculate relative accessabilities is not nessesary any more. SurfaceRacer now also adds a profile 'relAS' (conataining relative solvent accessible surf) and 'relMS' (relative molecular surface). @raise ProfileError: if WhatIf-returned atom/residue lists don't match. Usually that means, WhatIf didn't recognize some residue name """ w = WhatIf(self.m) atomRelAcc, resASA, resMask = w.run() ## normalAtoms = N0.logical_not( N0.logical_or(self.m.maskHetatm(), ## self.m.maskSolvent() ) ) normalAtoms = self.m.maskProtein(standard=1) normalRes = self.m.atom2resMask(normalAtoms) self.m.atoms.set( 'relASA', atomRelAcc, ## normalAtoms, 0, comment='relative accessible surface area in %', version=T.dateString() + ' ' + self.version()) self.m.residues.set('ASA_total', resASA[:, 0], normalRes, 0, comment='accessible surface area in A^2', version=T.dateString() + ' ' + self.version()) self.m.residues.set( 'ASA_sc', resASA[:, 1], normalRes, 0, comment='side chain accessible surface area in A^2', version=T.dateString() + ' ' + self.version()) self.m.residues.set('ASA_bb', resASA[:, 2], normalRes, 0, comment='back bone accessible surface area in A^2', version=T.dateString() + ' ' + self.version())
def addDensity( self, radius=6, minasa=None, profName='density' ): """ Count the number of heavy atoms within the given radius. Values are only collected for atoms with |minasa| accessible surface area. @param minasa: relative exposed surface - 0 to 100% @type minasa: float @param radius: in Angstrom @type radius: float """ mHeavy = self.m.maskHeavy() xyz = N.compress( mHeavy, self.m.getXyz(), 0 ) if minasa and self.m.profile( 'relAS', 0 ) == 0: self.addASA() if minasa: mSurf = self.m.profile2mask( 'relAS', minasa ) else: mSurf = N.ones( self.m.lenAtoms() ) ## loop over all surface atoms surf_pos = N.nonzero( mSurf ) contacts = [] for i in surf_pos: dist = N.sum(( xyz - self.m.xyz[i])**2, 1) contacts += [ N.sum( N.less(dist, radius**2 )) -1] self.m.atoms.set( profName, contacts, mSurf, default=-1, comment='atom density radius %3.1fA' % radius, version= T.dateString() + ' ' + self.version() )
def addSecondaryStructure(self): """ Adds a residue profile with the secondary structure as calculated by the DSSP program. Profile code:: B = residue in isolated beta-bridge E = extended strand, participates in beta ladder G = 3-helix (3/10 helix) I = 5 helix (pi helix) T = hydrogen bonded turn S = bend . = loop or irregular @raise ExeConfigError: if external application is missing """ dssp = Dssp(self.m) rmodel = dssp.run() self.m.residues.set('secondary', rmodel['dssp'], comment='secondary structure from DSSP', version=T.dateString() + ' ' + self.version(), default='.') self.m.residues.set('dssp_acc', rmodel['dssp_acc'], comment='accessible surface area from DSSP') self.m.residues.set('dssp_phi', rmodel['dssp_phi'], comment='PHI angle from DSSP', default=360.0) self.m.residues.set('dssp_psi', rmodel['dssp_psi'], comment='PSI angle from DSSP', default=360.0)
def addSecondaryStructure(self): """ Adds a residue profile with the secondary structure as calculated by the DSSP program. Profile code:: B = residue in isolated beta-bridge E = extended strand, participates in beta ladder G = 3-helix (3/10 helix) I = 5 helix (pi helix) T = hydrogen bonded turn S = bend . = loop or irregular @raise ExeConfigError: if external application is missing """ prot_mask = self.m.maskProtein() model = self.m.compress(prot_mask) dssp = Dssp(model) ss = dssp.run() self.m.residues.set('secondary', ss, mask=self.m.atom2resMask(prot_mask), comment='secondary structure from DSSP', version=T.dateString() + ' ' + self.version())
def __init__(self, lst=[] ): """ @param lst: list of Complexes @type lst: [Complex] """ ## non-redundant rec/lig_models of all complexes indexed by file name self.models = ComplexModelRegistry() self.initVersion = t.dateString() + ' ' + self.version() if lst != []: self.extend( lst )
def addASA( self ): """ Add profiles of Accessible Surface Area: 'relASA', 'ASA_total', 'ASA_sc', 'ASA_bb'. See L{Biskit.WhatIf} @note: Using WhatIf to calculate relative accessabilities is not nessesary any more. SurfaceRacer now also adds a profile 'relAS' (conataining relative solvent accessible surf) and 'relMS' (relative molecular surface). @raise ProfileError: if WhatIf-returned atom/residue lists don't match. Usually that means, WhatIf didn't recognize some residue name """ w = WhatIf( self.m ) atomRelAcc, resASA, resMask = w.run() ## normalAtoms = N.logical_not( N.logical_or(self.m.maskHetatm(), ## self.m.maskSolvent() ) ) normalAtoms = self.m.maskProtein( standard=1 ) normalRes = self.m.atom2resMask( normalAtoms ) self.m.atoms.set( 'relASA', atomRelAcc, ## normalAtoms, 0, comment='relative accessible surface area in %', version= T.dateString() + ' ' + self.version() ) self.m.residues.set( 'ASA_total', resASA[:,0], normalRes, 0, comment='accessible surface area in A^2', version= T.dateString() + ' ' + self.version() ) self.m.residues.set( 'ASA_sc', resASA[:,1], normalRes, 0, comment='side chain accessible surface area in A^2', version= T.dateString() + ' ' + self.version() ) self.m.residues.set( 'ASA_bb', resASA[:,2], normalRes, 0, comment='back bone accessible surface area in A^2', version= T.dateString() + ' ' + self.version() )
def addSurfaceMask( self, pname='relAS' ): """ Adds a surface mask profie that contains atoms with > 40% exposure compared to a random coil state. @param pname: name of relative profile to use (Whatif-relASA OR SurfaceRacer - relAS) (default: relAS) @type pname: str """ r = self.m.profile2mask( pname, cutoff_min=40 ) self.m.residues.set( 'surfMask', self.m.atom2resMask(r), comment='residues with any atom > 40% exposed', version= T.dateString() + ' ' + self.version() )
def addSurfaceMask(self, pname='relAS'): """ Adds a surface mask profie that contains atoms with > 40% exposure compared to a random coil state. @param pname: name of relative profile to use (Whatif-relASA OR SurfaceRacer - relAS) (default: relAS) @type pname: str """ r = self.m.profile2mask(pname, cutoff_min=40) self.m.residues.set('surfMask', self.m.atom2resMask(r), comment='residues with any atom > 40% exposed', version=T.dateString() + ' ' + self.version())
def addSecondaryStructure( self ): """ Adds a residue profile with the secondary structure as calculated by the DSSP program. Profile code:: B = residue in isolated beta-bridge E = extended strand, participates in beta ladder G = 3-helix (3/10 helix) I = 5 helix (pi helix) T = hydrogen bonded turn S = bend . = loop or irregular @raise ExeConfigError: if external application is missing """ dssp = Dssp( self.m ) ss = dssp.run() self.m.residues.set( 'secondary', ss, comment='secondary structure from DSSP', version= T.dateString() + ' ' + self.version() )
def addDensity(self, radius=6, minasa=None, profName='density'): """ Count the number of heavy atoms within the given radius. Values are only collected for atoms with |minasa| accessible surface area. @param minasa: relative exposed surface - 0 to 100% @type minasa: float @param radius: in Angstrom @type radius: float """ mHeavy = self.m.maskHeavy() xyz = N0.compress(mHeavy, self.m.getXyz(), 0) if minasa and self.m.profile('relAS', 0) == 0: self.addASA() if minasa: mSurf = self.m.profile2mask('relAS', minasa) else: mSurf = N0.ones(self.m.lenAtoms()) ## loop over all surface atoms surf_pos = N0.nonzero(mSurf) contacts = [] for i in surf_pos: dist = N0.sum((xyz - self.m.xyz[i])**2, 1) contacts += [N0.sum(N0.less(dist, radius**2)) - 1] self.m.atoms.set(profName, contacts, mSurf, default=-1, comment='atom density radius %3.1fA' % radius, version=T.dateString() + ' ' + self.version())
def __init__(self): """ Override but call. """ self.initVersion = t.dateString() + ' ' + self.version()
def addSurfaceRacer( self, probe=1.4, vdw_set=1, probe_suffix=0, mask=None ): """ Always adds three different profiles as calculated by fastSurf:: curvature - average curvature (or curvature_1.4 if probe_suffix=1) MS - molecular surface area (or MS_1.4 if probe_suffix=1) AS - accessible surface area (or AS_1.4 if probe_suffix=1) If the probe radii is 1.4 Angstrom and the Richards vdw radii set is used the following two profiles are also added:: relAS - Relative solvent accessible surface relMS - Relative molecular surface See {Biskit.SurfaceRacer} @param probe: probe radius @type probe: float @param vdw_set: defines what wdv-set to use (1-Richards, 2-Chothia) @type vdw_set: 1|2 @param probe_suffix: append probe radius to profile names @type probe_suffix: 1|0 @param mask: optional atom mask to apply before calling surface racer (default: heavy atoms AND NOT solvent) @type mask: [ bool ] @raise ExeConfigError: if external application is missing """ name_MS = 'MS' + probe_suffix * ('_%3.1f' % probe) name_AS = 'AS' + probe_suffix * ('_%3.1f' % probe) name_curv = 'curvature' + probe_suffix * ('_%3.1f' % probe) ## hydrogens + waters are not allowed during FastSurf calculation mask = mask if mask is not None else \ self.m.maskHeavy() * N.logical_not( self.m.maskSolvent() ) fs = SurfaceRacer( self.m, probe, vdw_set=vdw_set, mask=mask ) fs_dic = fs.run() fs_info= fs_dic['surfaceRacerInfo'] self.m.atoms.set( name_MS, fs_dic['MS'], mask, 0, comment='Molecular Surface area in A', version= T.dateString() + ' ' + self.version(), **fs_info ) self.m.atoms.set( name_AS, fs_dic['AS'], mask, 0, comment='Accessible Surface area in A', version= T.dateString() + ' ' + self.version(), **fs_info ) self.m.atoms.set( name_curv, fs_dic['curvature'], mask, 0, comment='Average curvature', version= T.dateString() + ' ' + self.version(), **fs_info ) if round(probe, 1) == 1.4 and vdw_set == 1 and 'relAS' in fs_dic: self.m.atoms.set( 'relAS', fs_dic['relAS'], mask, 0, comment='Relative solvent accessible surf.', version= T.dateString()+' ' +self.version(), **fs_info ) self.m.atoms.set( 'relMS', fs_dic['relMS'], mask, 0, comment='Relative molecular surf.', version= T.dateString()+' '+self.version(), **fs_info )
def addSurfaceRacer(self, probe=1.4, vdw_set=1, probe_suffix=0, mask=None): """ Always adds three different profiles as calculated by fastSurf:: curvature - average curvature (or curvature_1.4 if probe_suffix=1) MS - molecular surface area (or MS_1.4 if probe_suffix=1) AS - accessible surface area (or AS_1.4 if probe_suffix=1) If the probe radii is 1.4 Angstrom and the Richards vdw radii set is used the following two profiles are also added:: relAS - Relative solvent accessible surface relMS - Relative molecular surface See {Biskit.SurfaceRacer} @param probe: probe radius @type probe: float @param vdw_set: defines what wdv-set to use (1-Richards, 2-Chothia) @type vdw_set: 1|2 @param probe_suffix: append probe radius to profile names @type probe_suffix: 1|0 @param mask: optional atom mask to apply before calling surface racer (default: heavy atoms AND NOT solvent) @type mask: [ bool ] @raise ExeConfigError: if external application is missing """ name_MS = 'MS' + probe_suffix * ('_%3.1f' % probe) name_AS = 'AS' + probe_suffix * ('_%3.1f' % probe) name_curv = 'curvature' + probe_suffix * ('_%3.1f' % probe) ## hydrogens + waters are not allowed during FastSurf calculation mask = mask if mask is not None else \ self.m.maskHeavy() * N0.logical_not( self.m.maskSolvent() ) fs = SurfaceRacer(self.m, probe, vdw_set=vdw_set, mask=mask) fs_dic = fs.run() fs_info = fs_dic['surfaceRacerInfo'] self.m.atoms.set(name_MS, fs_dic['MS'], mask, 0, comment='Molecular Surface area in A', version=T.dateString() + ' ' + self.version(), **fs_info) self.m.atoms.set(name_AS, fs_dic['AS'], mask, 0, comment='Accessible Surface area in A', version=T.dateString() + ' ' + self.version(), **fs_info) self.m.atoms.set(name_curv, fs_dic['curvature'], mask, 0, comment='Average curvature', version=T.dateString() + ' ' + self.version(), **fs_info) if round(probe, 1) == 1.4 and vdw_set == 1 and 'relAS' in fs_dic: self.m.atoms.set('relAS', fs_dic['relAS'], mask, 0, comment='Relative solvent accessible surf.', version=T.dateString() + ' ' + self.version(), **fs_info) self.m.atoms.set('relMS', fs_dic['relMS'], mask, 0, comment='Relative molecular surf.', version=T.dateString() + ' ' + self.version(), **fs_info)
def addConservation(self, pfamEntries=None, verbose=0, log=None): """ Adds a conservation score profile from pFam HMMs. See L{Biskit.Hmmer} The theoretically most useful one is 'cons_ent' which gives the relative entropy of the residue distribution with respect to the background distribution of amino acids (Kullback-Leibler distance) in swissprot. See PMID 16916457. @param pfamEntries: External hmmSearch result, list of (non-overlapping) profile hits. (default: None, do the search) Example:: [{'ribonuclease': [[1, 108]]},..] [{profileName : [ [startPos, endPos], [start2, end2]]}] - startPos, endPos as reported by hmmPfam for PDB sequence generated from this model @type pfamEntries: [{dict}] @param verbose: verbosity level (default: 0) @type verbose: 1|0 @param log: Log file for messages [STDOUT] @type log: Biskit.LogFile @raise ExeConfigError: if external application is missing """ ## mask out solvent and other troublemakers mask = self.m.maskProtein() resmask = self.m.atom2resMask(mask) m = self.m if not N0.alltrue(mask): m = self.m.compress(mask) h = Hmmer(verbose=verbose, log=log) h.checkHmmdbIndex() p, hmmHits = h.scoreAbsSum(m, hmmNames=pfamEntries) self.m.residues.set( 'cons_abs', p, hmmHits=hmmHits, mask=resmask, comment="absolute sum of all 20 hmm scores per position", version=T.dateString() + ' ' + self.version()) p, hmmHits = h.scoreMaxAll(m, hmmNames=hmmHits) self.m.residues.set( 'cons_max', p, hmmHits=hmmHits, mask=resmask, comment="max of 20 hmm scores (-average / SD) per position", version=T.dateString() + ' ' + self.version()) p, hmmHits = h.scoreEntropy(m, hmmNames=hmmHits) self.m.residues.set( 'cons_ent', p, hmmHits=hmmHits, mask=resmask, comment="relative entropy (Kullback-Leibler distance) between "\ +"observed and background amino acid distribution "\ +"(high -> high conservation/discrimination)", version= T.dateString() + ' ' + self.version() )