def takeFrames( self, indices ): """ Return a copy of the trajectory containing only the specified frames. @param indices: positions to take @type indices: [int] @return: copy of this Trajectory (fewer frames, semi-deep copy of ref) @rtype: Trajectory """ ## remove out-of-bound indices indices = N0.compress( N0.less( indices, len( self.frames) ), indices ) r = self.__class__() ## this step takes some time for large frames ! r.frames = N0.take( self.frames, indices, 0 ) ## semi-deep copy of reference model r.setRef( self.ref.take( range( self.ref.lenAtoms() )) ) if self.frameNames is not None: r.frameNames = N0.take( self.frameNames, indices, 0 ) r.frameNames = map( ''.join, r.frameNames.tolist() ) r.pc = self.__takePca( indices ) r.profiles = self.profiles.take( indices ) r.resIndex = self.resIndex return r
def take(self, rec_pos, lig_pos): """ Get copy of this complex with given atoms of rec and lig. @param rec_pos: receptor indices to take @type rec_pos: [int] @param lig_pos: ligand indices to take @type lig_pos: [int] @return: new complex @rtype: Complex """ r = self.__class__() r.lig_model = self.lig_model.take(lig_pos) r.rec_model = self.rec_model.take(rec_pos) r.info = deepcopy(self.info) if self.pw_dist: r.pw_dist = N0.take(self.pw_dist, rec_pos, 1) r.pw_dist = N0.take(r.pw_dist, lig_pos) r.ligandMatrix = copy(self.ligandMatrix) ## todo: take cached contacts as well return r
def rmsMatrixByMember(self, mirror=0, step=1): """ Get result matrix ordered first by member then by time. (requires EnsembleTraj) @param mirror: mirror matrix at diagonal (only for intra-traj. rms) (default: 0) @type mirror: 0|1 @param step: take only every step frame [1] @type step: int """ intra_traj = self.traj_2 is None m = self.getResult(mirror=intra_traj) i1 = i2 = self.traj_1.argsortMember(step=step) if self.traj_2 is not None: i2 = self.traj_2.argsortMember(step=step) a = N0.take(m, i1, 0) a = N0.take(a, i2, 1) if intra_traj and not mirror: for i in range(N0.shape(a)[0]): for j in range(i, N0.shape(a)[1]): a[j, i] = 0. return a
def rmsMatrixByMember( self, mirror=0, step=1 ): """ Get result matrix ordered first by member then by time. (requires EnsembleTraj) @param mirror: mirror matrix at diagonal (only for intra-traj. rms) (default: 0) @type mirror: 0|1 @param step: take only every step frame [1] @type step: int """ intra_traj = self.traj_2 is None m = self.getResult( mirror=intra_traj ) i1 = i2 = self.traj_1.argsortMember( step=step ) if self.traj_2 is not None: i2 = self.traj_2.argsortMember( step=step ) a = N0.take( m, i1, 0 ) a = N0.take( a, i2, 1 ) if intra_traj and not mirror: for i in range( N0.shape(a)[0] ): for j in range( i, N0.shape(a)[1] ): a[j,i] = 0. return a
def __thinarray(self, a, step): """ @param a: input array @type a: N0.array @param step: stepping in both dimensions @type step: int @return: smaller array @rtype: N0.array """ r = N0.take(a, range(0, len(a), step), axis=0) r = N0.take(r, range(0, len(r[0]), step), axis=1) return r
def takeAtoms( self, indices, returnClass=None ): """ Take atoms from frames:: takeAtoms( indices, type=None ) -> copy of Trajectory @param indices: list of atom indices @type indices: [int] @param returnClass: default: None, same class as this object @type returnClass: class OR None @return: copy of this Trajectory (with fewer atoms) @rtype: Trajectory """ returnClass = returnClass or self.__class__ r = returnClass() ## copy over everything, so that child classes can preserve own fields r.__dict__.update( self.__dict__ ) r.frames = r.ref = r.frameNames = r.profiles = None r.frames = N0.take( self.frames, indices, 1 ) r.setRef( self.ref.take( indices ) ) r.frameNames = copy.copy( self.frameNames ) r.resIndex = None r.profiles = self.profiles.clone() r.pc = self.pc ## this is not really clean return r
def getResFluct( self, atomFluctList=None ): """ Convert list of atomic fluctuations to list of residue fluctuation. @param atomFluctList: array 1 x N_atoms of float @type atomFluctList: [float] @return: array 1 x N_residues of float @rtype: [float] @raise TrajError: if result length <> N_residues: """ if atomFluctList is None: atomFluctList = self.getFluct_global() ## Give all atoms of each res. the same fluct. value ## (the highest fluctuation of any backbone atom) result = self.residusMaximus( atomFluctList, self.ref.maskBB() ) ## take first atoms only result = N0.take( result, self.ref.resIndex() ) ## result = N0.compress( self.ref.maskCA(), atomFluctList) ## check dimension if len( result ) <> self.ref.lenResidues(): raise TrajError( "getResFluct(): Length of result list (%i) <>" % len(result)+ " number of residues (%i)." % self.ref.lenResidues() ) return result
def __translateChainIndices(self, atomIndices, newChainMap): """ Translate current chain indices into what they would look like in a PDBModel containing only the given atoms in the given order. @param atomIndices: indices of atoms @type atomIndices: [int] @param newChainMap: chain map [0000011133333..] @type newChainMap: [int] @return: { int:int, .. } map current chain indices to new ones @rtype: {int:int} @raise ComplexTrajError: if (parts of) chains are inserted into each other """ ## todo: looks not very elegant oldChainMap = N0.take(self.ref.chainMap(), atomIndices) r = {} for i in range(len(oldChainMap)): old, new = oldChainMap[i], newChainMap[i] if old in r: if r[old] != new: raise ComplexTrajError( "Can't insert different chains into each other.") else: r[old] = new return r
def group(self, a_indices, maxPerCenter): """ Group a bunch of integers (atom indices in PDBModel) so that each group has at most maxPerCenter items. @param a_indices: atom indices @type a_indices: [int] @param maxPerCenter: max entries per group @type maxPerCenter: int @return: list of lists of int @rtype: [[int],[int]..] """ ## how many groups are necessary? n_centers = len(a_indices) / maxPerCenter if len(a_indices) % maxPerCenter: n_centers += 1 ## how many items/atoms go into each group? nAtoms = N0.ones(n_centers, N0.Int) * int(len(a_indices) / n_centers) i = 0 while N0.sum(nAtoms) != len(a_indices): nAtoms[i] += 1 i += 1 ## distribute atom indices into groups result = [] pos = 0 for n in nAtoms: result += [N0.take(a_indices, N0.arange(n) + pos)] pos += n return result
def shuffledLists( self, n, lst, mask=None ): """ shuffle order of a list n times, leaving masked(0) elements untouched @param n: number of times to shuffle the list @type n: int @param lst: list to shuffle @type lst: [any] @param mask: mask to be applied to lst @type mask: [1|0] @return: list of shuffeled lists @rtype: [[any]] """ if not mask: mask = N0.ones( len(lst) ) if type( lst ) == list: lst = N0.array( lst ) pos = N0.nonzero( mask ) rand_pos = N0.array( [ self.__shuffleList( pos ) for i in range(n) ] ) result = [] for p in rand_pos: r = copy.copy( lst ) N0.put( r, p, N0.take( lst, pos ) ) result += [r] return result
def reduceToModel(self, xyz=None, reduce_profiles=1): """ Create a reduced PDBModel from coordinates. Atom profiles the source PDBModel are reduced by averaging over the grouped atoms. @param xyz: coordinte array (N_atoms x 3) or None (->use reference coordinates) @type xyz: array OR None @return: PDBModel with reduced atom set and profile 'mass' @rtype: PDBModel """ mass = self.m.atoms.get('mass') if xyz is None: xyz = self.m.getXyz() mProf = [N0.sum(N0.take(mass, group)) for group in self.groups] xyz = self.reduceXyz(xyz) result = PDBModel() for k in self.atoms.keys(): result.atoms.set(k, self.atoms.valuesOf(k)) ## result.setAtoms( self.atoms ) result.setXyz(xyz) result.atoms.set('mass', mProf) if reduce_profiles: self.reduceAtomProfiles(self.m, result) result.residues = self.m.residues return result
def valuesOf(self, infoKey, default=None, indices=None, unique=0 ): """ Get all values of a certain info record of all or some Complexes. @param infoKey: key for info dict @type infoKey: str @param default: default value if infoKey is not found (None) @type default: any @param indices: list of int OR None(=all), indices of Complexes (None) @type indices: [int] OR None @param unique: report each value only once (set union), (default 0) @type unique: 1|0 @return: list of values @rtype: [any] """ l = self if indices is not None: l = N0.take( N0.array(l,'O'), indices ) if not unique: return [ c.info.get(infoKey, default) for c in l ] r = [] for c in l: if c.info.get(infoKey, default) not in r: r += [ c.info.get( infoKey ) ] return r
def convertChainIdsNter(self, model, chains): """ Convert normal chain ids to chain ids considering chain breaks. """ if len(chains) == 0: return chains i = N0.take(model.chainIndex(), chains) ## convert back to chain indices but this time including chain breaks return model.atom2chainIndices(i, breaks=1)
def __find_intervals(self, l): l = N0.array(l) l = N0.take(l, N0.argsort(l)) globals().update(locals()) break_points = N0.nonzero(N0.greater(l[1:] - l[:-1], 1)) start = 0 intervals = [] for i in range(len(break_points)): index = break_points[i] intervals.append(tuple(N0.take(l, range(start, index + 1)))) start = index + 1 intervals.append(tuple(l[start:])) return intervals
def convertChainIdsCter(self, model, chains): """ Convert normal chain ids to chain ids considering chain breaks. """ if len(chains) == 0: return chains ## fetch last atom of given chains index = N0.concatenate((model.chainIndex(), [len(model)])) i = N0.take(index, N0.array(chains) + 1) - 1 ## convert back to chain indices but this time including chain breaks return model.atom2chainIndices(i, breaks=1)
def __shuffleList(self, lst ): """ shuffle order of lst @param lst: list to shuffle @type lst: [any] @return: shuffeled list @rtype: [any] """ pos = R.permutation( len( lst )) return N0.take( lst, pos )
def reduceXyz(self, xyz, axis=0): """ Reduce the number of atoms in the given coordinate set. The set must have the same length and order as the reference model. It may have an additional (time) dimension as first axis. @param xyz: coordinates (N_atoms x 3) or (N_frames x N_atoms x 3) @type xyz: array @param axis: axis with atoms (default: 0) @type axis: int @return: coordinate array (N_less_atoms x 3) or (N_frames x N_less_atoms x 3) @rtype: array """ masses = self.m.atoms.get('mass') r_xyz = None for atom_indices in self.groups: x = N0.take(xyz, atom_indices, axis) m = N0.take(masses, atom_indices) center = N0.sum(x * N0.transpose([ m, ]), axis=axis) / N0.sum(m) if axis == 0: center = center[N0.NewAxis, :] if axis == 1: center = center[:, N0.NewAxis, :] if r_xyz is None: r_xyz = center else: r_xyz = N0.concatenate((r_xyz, center), axis) return r_xyz
def __takePca( self, indices ): """ extract PCA results for certain frames. @param indices: frame indecies @type indices: [int] @return: list of pca values @rtype: [float] """ result = copy.deepcopy( getattr(self, 'pc', None )) if result is not None: result['p'] = N0.take( result['p'], indices, 0 ) result['u'] = N0.take( result['u'], indices, 0 ) if result['fMask'] is not None: result['fMask'] = N0.take( result['fMask'], indices, 0 ) return result
def __resWindow( self, res, n_neighbores, rchainMap=None, left_allowed=None, right_allowed=None ): """ Get indices of all atoms of a residue and some atoms of its neighboring residues (if they belong to the same chain). @param res: residue index @type res: int @param n_neighbores: number of residues to include right and left @type n_neighbores: int @param right_allowed: array 1 x N_atoms of 1|0, possible neighbore atoms @type right_allowed: array @param left_allowed: array 1 x N_atoms of 1|0, possible neighbore atoms @type left_allowed: array @param rchainMap: array 1 x N_residues of int, chain id of each res @type rchainMap: array @return: atoms of res, atoms of neighbores @rtype: [ int ], [ int ] """ ## some defaults.. time-consuming.. if rchainMap is None: rchainMap = N0.take( self.chainMap(), self.resIndex() ) if left_allowed is None: left_allowed = N0.nonzero( self.ref.maskBB() ) if right_allowed is None: right_allowed= N0.nonzero( self.ref.maskBB() ) ## atom indices of center residue result = self.ref.res2atomIndices( [ res ] ).tolist() ## get indices of neighbore residues that still belong to same chain l = self.ref.lenResidues() chain = rchainMap[res] outer_left = range( res-n_neighbores, res ) outer_right= range( res+1, res+n_neighbores+1 ) outer_left = [ i for i in outer_left if i > 0 and rchainMap[i]==chain] outer_right= [ i for i in outer_right if i < l and rchainMap[i]==chain] ## convert to atom indices, filter them against allowed neighbore atoms if outer_left: outer_left = self.ref.res2atomIndices( outer_left ) outer_left = MU.intersection( left_allowed, outer_left ) if outer_right: outer_right= self.ref.res2atomIndices( outer_right) outer_right= MU.intersection( right_allowed, outer_right) return result, outer_left + outer_right
def orderCenters(self, points, origin=None): """ Order random points by increasing distance to first or to origin. points - n x 3 array of float, random center coordinates origin - 3 array of float -> [ int ], indices into points ordered by increasing distance """ origin = origin if origin is None: origin = points[0] dist = self.__distances(origin, points) return N0.take(points, N0.argsort(dist))
def orderCenters( self, points, origin=None ): """ Order random points by increasing distance to first or to origin. points - n x 3 array of float, random center coordinates origin - 3 array of float -> [ int ], indices into points ordered by increasing distance """ origin = origin if origin is None: origin = points[0] dist = self.__distances( origin, points ) return N0.take( points, N0.argsort( dist ) )
def memberIndices(self, member, step=1): """ List of frame indices for this member:: memberIndices( int_member, [int_step] ) @param member: member trajectory @type member: int @param step: return only every i'th frame (default: 1) @type step: int @return: indices for members @rtype: [int] """ r = range(member, self.lenFrames(), self.n_members) if step != 1: r = N0.take(r, range(0, len(r), step)).tolist() return r
def random_contacts( self, contMat, n, maskRec=None, maskLig=None ): """ Create randomized surface contact matrix with same number of contacts and same shape as given contact matrix. @param contMat: template contact matrix @type contMat: matrix @param n: number of matrices to generate @type n: int @param maskRec: surface masks (or something similar) @type maskRec: [1|0] @param maskLig: surface masks (or something similar) @type maskLig: [1|0] @return: list of [n] random contact matricies @rtype: [matrix] """ a,b = N0.shape( contMat ) nContacts = N0.sum( N0.sum( contMat )) if not maskLig: r_size, l_size = N0.shape( contMat ) maskLig = N0.ones( l_size ) maskRec = N0.ones( r_size ) c_mask = N0.ravel( N0.outerproduct( maskRec, maskLig ) ) c_pos = N0.nonzero( c_mask ) # get array with surface positions from complex cont = N0.take( N0.ravel(contMat), c_pos ) length = len( cont ) result = [] for i in range( n ): # create random array ranCont = mathUtils.randomMask( nContacts,length ) # blow up to size of original matrix r = N0.zeros(a*b) N0.put( r, c_pos, ranCont) result += [ N0.reshape( r, (a,b) ) ] return result
def calcProfiles(self, m): """ Calculate needed profiles. @param m: PDBModel to calculate data for @type m: PDBModel """ if self.verbose: print "Initiating PDBDope..." d = PDBDope(m) if not self.profileName in m.atoms.keys(): if self.profileName in ['MS', 'AS', 'curvature', 'relAS', 'relMS']: if self.verbose: print "Adding SurfaceRacer profile...", d.addSurfaceRacer() if self.profileName in ['density']: if self.verbose: print "Adding surface density...", d.addDensity() if not self.profileName in m.residues.keys(): if self.profileName in ['cons_abs', 'cons_max', 'cons_ent']: if self.verbose: print "Adding conservation data...", d.addConservation() if self.verbose: print 'Done.' ## convert atom profiles to average residue profile if self.profileName in m.atoms.keys(): prof = [] aProfile = m.profile(self.profileName) resIdx = m.resIndex().tolist() resIdx += [m.lenAtoms()] for i in range(len(resIdx) - 1): prof += [ N0.average( N0.take(aProfile, range(resIdx[i], resIdx[i + 1]))) ] else: prof = m.profile(self.profileName) return prof
def __setAll_1D( self, a ): """ Replace content of this sparseArray with values from Numeric array or list of numbers -- only for 1-dimensional arrays. @param a: array OR list @type a: array OR [ number ] """ if type( a ) is list: a = N0.array( a, self.__typecode ) if self.shape != a.shape: raise SparseArrayError, 'dimensions not aligned' self.indices = N0.nonzero( N0.logical_not( N0.equal(a, self.__default) ) ) self.indices = self.indices.tolist() self.values = N0.take( a, self.indices ) self.values = self.values.tolist()
def __setAll_1D(self, a): """ Replace content of this sparseArray with values from Numeric array or list of numbers -- only for 1-dimensional arrays. @param a: array OR list @type a: array OR [ number ] """ if type(a) is list: a = N0.array(a, self.__typecode) if self.shape != a.shape: raise SparseArrayError, 'dimensions not aligned' self.indices = N0.nonzero(N0.logical_not(N0.equal(a, self.__default))) self.indices = self.indices.tolist() self.values = N0.take(a, self.indices) self.values = self.values.tolist()
def valuesOf( self, infoKey, version=None, default=None, indices=None, unique=0 ): """ Get all values of a certain info record of all or some Complexes. @param infoKey: key for info dict @type infoKey: str @param version: index in history or None (=current) (default: None) @type version: int @param default: default value if infoKey is not found (default: None) @type default: any @param indices: list of int OR None(=all), indices of Complexes (default: None) @type indices: [int] OR None @param unique: report each value only once (set union), (default: 0) @type unique: 1|0 @return: list of values @rtype: [any] """ l = self if indices is not None: l = N0.take( l, indices ) if not unique: if version is None: return [ c.get(infoKey, default) for c in l ] return [ c[version].get( infoKey, default) for c in l ] r = [] for c in l: if version is not None: c = c[ version ] if c.info.get(infoKey, default) not in r: r += [ c.info.get( infoKey ) ] return r
def thin(self, step=1): """ Keep only each step'th frame from trajectory with 10 ensemble members. @param step: 1..keep all frames, 2..skip first and every second, .. (default: 1) @type step: int @return: reduced EnsembleTraj @rtype: EnsembleTraj """ T.ensure(step, int, forbidden=[0]) ## 10 x lenFrames/10, frame indices of each member mI = [self.memberIndices(i) for i in range(self.n_members)] mI = N0.array(mI) mI = N0.take(mI, range(-1, N0.shape(mI)[1], step)[1:], 1) mI = N0.transpose(mI) return self.takeFrames(N0.ravel(mI))
def reduceAtomProfiles(self, from_model, to_model): """ reduce all atom profiles according to the calculated map by calculating the average over the grouped atoms. @param from_model: model @type from_model: PDBModel @param to_model: model @type to_model: PDBModel """ for profname in from_model.atoms: p0 = from_model.atoms.get(profname) info = from_model.profileInfo(profname) try: pr = [N0.average(N0.take(p0, group)) for group in self.groups] to_model.atoms.set(profname, pr) except: pass to_model.atoms.setInfo(profname, **info)
def confidenceInterval(self, level): """ confidenceInterval(self, level) @param level: confidence level (e.g. 0.68 for stdev interval) @type level: float @return: start and end of the confidence interval containing |level|*100 % of the probability @rtype: float, float """ order = N0.argsort(self.p).tolist() cumulative = N0.add.accumulate(N0.take(self.p, order)) * self.delta_x ind = N0.nonzero(N0.greater_equal(cumulative, 1. - level)) sub_set = order[ind[0]:] intervals = self.__find_intervals(sub_set) boundaries = [(self.x[i[0]], self.x[i[-1]]) for i in intervals] return tuple(boundaries)
def findConfidenceInterval(self, x): """ findConfidenceInterval(self, x) Find the smallest possible density interval that still includes x. @param x: value @type x: float @return: convidence level, interval start and end @rtype: float, (float,float) """ closest = N0.argmin(abs(self.x - x)) ind = N0.nonzero(N0.greater_equal(self.p, self.p[closest])).tolist() intervals = self.__find_intervals(ind) ## lens = N0.array([len(i) for i in intervals]) levels = [N0.sum(N0.take(self.p, i)) for i in intervals] level = N0.sum(levels) * self.delta_x boundaries = [(self.x[i[0]], self.x[i[-1]]) for i in intervals] return level, tuple(boundaries)
def parse_result( self ): """ Extract some information about the profile as well as the match state emmission scores. Keys of the returned dictionary:: 'AA', 'name', 'NrSeq', 'emmScore', 'accession', 'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum' @return: dictionary with warious information about the profile @rtype: dict """ ## check that the outfut file is there and seems valid if not os.path.exists( self.f_out ): raise HmmerError,\ 'Hmmerfetch result file %s does not exist.'%self.f_out if T.fileLength( self.f_out ) < 10: raise HmmerError,\ 'Hmmerfetch result file %s seems incomplete.'%self.f_out profileDic = {} ## read result hmm = open( self.f_out, 'r') out = hmm.read() hmm.close() ## collect some data about the hmm profile profileDic['name'] = self.hmmName profileDic['profLength'] = \ int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] ) profileDic['accession'] = \ string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] profileDic['NrSeq'] = \ int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] ) profileDic['AA'] = \ string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:] ## collect null emmission scores pattern = 'NULE[ ]+' + '[-0-9]+[ ]+'*20 nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ] ## get emmision scores prob=[] for i in range(1, profileDic['profLength']+1): pattern = "[ ]+%i"%i + "[ ]+[-0-9]+"*20 e = [ float(j) for j in string.split(re.findall(pattern, out)[0]) ] prob += [ e ] profileDic['seqNr'] = N0.transpose( N0.take( prob, (0,),1 ) ) profileDic['emmScore'] = N0.array(prob)[:,1:] ## calculate emission probablitities emmProb, nullProb = self.hmmEmm2Prob( nullEmm, profileDic['emmScore']) ent = [ N0.resize( self.entropy(e, nullProb), (1,20) )[0] for e in emmProb ] profileDic['ent'] = N0.array(ent) ###### TEST ##### proba = N0.array(prob)[:,1:] ## # test set all to max score ## p = proba ## p1 = [] ## for i in range( len(p) ): ## p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ] ## profileDic['maxAll'] = p1 # test set all to N0.sum( abs( probabilities ) ) p = proba p2 = [] for i in range( len(p) ) : p2 += [ N0.resize( N0.sum( N0.absolute( p[i] )), N0.shape( p[i] ) ) ] profileDic['absSum'] = p2 # set all to normalized max score p = proba p4 = [] for i in range( len(p) ) : p_scale = (p[i] - N0.average(p[i]) )/ math.SD(p[i]) p4 += [ N0.resize( p_scale[N0.argmax( N0.array(p_scale) )] , N0.shape( p[i] ) ) ] profileDic['maxAllScale'] = p4 return profileDic
def parse_result(self): """ Extract some information about the profile as well as the match state emmission scores. Keys of the returned dictionary:: 'AA', 'name', 'NrSeq', 'emmScore', 'accession', 'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum' @return: dictionary with warious information about the profile @rtype: dict """ ## check that the outfut file is there and seems valid if not os.path.exists(self.f_out): raise HmmerError,\ 'Hmmerfetch result file %s does not exist.'%self.f_out if T.fileLength(self.f_out) < 10: raise HmmerError,\ 'Hmmerfetch result file %s seems incomplete.'%self.f_out profileDic = {} ## read result hmm = open(self.f_out, 'r') out = hmm.read() hmm.close() ## collect some data about the hmm profile profileDic['name'] = self.hmmName profileDic['profLength'] = \ int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] ) profileDic['accession'] = \ string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] profileDic['NrSeq'] = \ int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] ) profileDic['AA'] = \ string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:] ## collect null emmission scores pattern = 'NULE[ ]+' + '[-0-9]+[ ]+' * 20 nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ] ## get emmision scores prob = [] for i in range(1, profileDic['profLength'] + 1): pattern = "[ ]+%i" % i + "[ ]+[-0-9]+" * 20 e = [float(j) for j in string.split(re.findall(pattern, out)[0])] prob += [e] profileDic['seqNr'] = N0.transpose(N0.take(prob, (0, ), 1)) profileDic['emmScore'] = N0.array(prob)[:, 1:] ## calculate emission probablitities emmProb, nullProb = self.hmmEmm2Prob(nullEmm, profileDic['emmScore']) ent = [ N0.resize(self.entropy(e, nullProb), (1, 20))[0] for e in emmProb ] profileDic['ent'] = N0.array(ent) ###### TEST ##### proba = N0.array(prob)[:, 1:] ## # test set all to max score ## p = proba ## p1 = [] ## for i in range( len(p) ): ## p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ] ## profileDic['maxAll'] = p1 # test set all to N0.sum( abs( probabilities ) ) p = proba p2 = [] for i in range(len(p)): p2 += [N0.resize(N0.sum(N0.absolute(p[i])), N0.shape(p[i]))] profileDic['absSum'] = p2 # set all to normalized max score p = proba p4 = [] for i in range(len(p)): p_scale = (p[i] - N0.average(p[i])) / math.SD(p[i]) p4 += [ N0.resize(p_scale[N0.argmax(N0.array(p_scale))], N0.shape(p[i])) ] profileDic['maxAllScale'] = p4 return profileDic
def getFluct_local( self, mask=None, border_res=1, left_atoms=['C'], right_atoms=['N'], verbose=1 ): """ Get mean displacement of each atom from it's average position after fitting of each residue to the reference backbone coordinates of itself and selected atoms of neighboring residues to the right and left. @param mask: N_atoms x 1 array of 0||1, atoms for which fluctuation should be calculated @type mask: array @param border_res: number of neighboring residues to use for fitting @type border_res: int @param left_atoms: atoms (names) to use from these neighbore residues @type left_atoms: [str] @param right_atoms: atoms (names) to use from these neighbore residues @type right_atoms: [str] @return: Numpy array ( N_unmasked x 1 ) of float @rtype: array """ if mask is None: mask = N0.ones( len( self.frames[0] ), N0.Int32 ) if verbose: T.errWrite( "rmsd fitting per residue..." ) residues = N0.nonzero( self.ref.atom2resMask( mask ) ) ## backbone atoms used for fit fit_atoms_right = N0.nonzero( self.ref.mask( right_atoms ) ) fit_atoms_left = N0.nonzero( self.ref.mask( left_atoms ) ) ## chain index of each residue rchainMap = N0.take( self.ref.chainMap(), self.ref.resIndex() ) result = [] for res in residues: i_res, i_border = self.__resWindow(res, border_res, rchainMap, fit_atoms_left, fit_atoms_right) try: if not len( i_res ): raise PDBError, 'empty residue' t_res = self.takeAtoms( i_res + i_border ) i_center = range( len( i_res ) ) mask_BB = t_res.ref.maskBB() * t_res.ref.maskHeavy() ## fit with border atoms .. t_res.fit( ref=t_res.ref, mask=mask_BB, verbose=0 ) ## .. but calculate only with center residue atoms frames = N0.take( t_res.frames, i_center, 1 ) avg = N0.average( frames ) rmsd = N0.average(N0.sqrt(N0.sum(N0.power(frames - avg, 2), 2) )) result.extend( rmsd ) if verbose: T.errWrite('#') except ZeroDivisionError: result.extend( N0.zeros( len(i_res), N0.Float32 ) ) T.errWrite('?' + str( res )) if verbose: T.errWriteln( "done" ) return result