def castHmmDic(self, hmmDic, repete, hmmGap, key): """ Blow up hmmDic to the number of repetes of the profile used. Correct scores for possible deletions in the search sequence. @param hmmDic: dictionary from L{getHmmProfile} @type hmmDic: dict @param repete: repete information from L{align} @type repete: int @param hmmGap: information about gaps from L{align} @type hmmGap: [int] @param key: name of scoring method to adjust for gaps and repetes @type key: str @return: dictionary with information about the profile @rtype: dict """ s = hmmDic[key] for i in range(repete): mask = N0.ones(len(s)) N0.put(mask, hmmGap[i], 0) if i == 0: score = N0.compress(mask, s, 0) if i > 0: score = N0.concatenate((N0.compress(mask, s, 0), score)) hmmDic[key] = score return hmmDic
def mergeProfiles(self, p0, p1, maxOverlap=3): """ Merge profile p0 with profile p1, as long as they overlap in at most maxOverlap positions @param p0: profile @type p0: [float] @param p1: profile @type p1: [float] @param maxOverlap: maximal allowed overlap between profiles @type maxOverlap: int @return: array @rtype: """ p0 = self.__list2array(p0) p1 = self.__list2array(p1) overlap = N0.greater(N0.greater(p0, 0) + N0.greater(p1, 0), 1) if N0.sum(overlap) <= maxOverlap: ## one of the two profiles will in most cases not belong to these ## positions. We can't decide which one is wrong, let's eliminate ## both values. Alternatively we could keep one, or the average, .. N0.put(p1, N0.nonzero(overlap), 0) N0.put(p0, N0.nonzero(overlap), 0) p0 = p0 + p1 return p0
def loadResContacts(self): """ Uncompress residue contact matrix if necessary. @return: dict with contact matrix and parameters OR None @rtype: dict OR None """ ## Backwards compatibility if self.contacts is not None and type(self.contacts) == str: self.contacts = t.load(self.contacts) EHandler.warning("loading old-style pickled contacts.") return self.contacts ## New, uncompression from list of indices into raveled array if self.contacts is not None and \ len( N0.shape( self.contacts['result'])) == 1: try: lenRec, lenLig = self.contacts['shape'] except: EHandler.warning("uncompressing contacts without shape") lenRec = self.rec().lenResidues() lenLig = self.lig().lenResidues() m = N0.zeros(lenRec * lenLig) N0.put(m, self.contacts['result'], 1) self.contacts['result'] = N0.reshape(m, (lenRec, lenLig)) return self.contacts
def __unmaskedMatrix(self, contacts, rec_mask, lig_mask): """ Map contacts between selected rec and lig atoms back to all atoms matrix. @param contacts: contact matrix, array sum_rec_mask x sum_lig_mask @type contacts: array @param rec_mask: atom mask @type rec_mask: [1|0] @param lig_mask: atom mask @type lig_mask: [1|0] @return: atom contact matrix, array N_atoms_rec x N_atoms_lig @rtype: array """ l_rec = len(self.rec_model) l_lig = len(self.lig_model) ## map contacts back to all atoms matrix r = N0.zeros(l_rec * l_lig) rMask = N0.ravel(N0.outerproduct(rec_mask, lig_mask)) ## (Optimization: nonzero is time consuming step) N0.put(r, N0.nonzero(rMask), N0.ravel(contacts)) return N0.resize(r, (l_rec, l_lig))
def blockFit(self, ref=None, mask=None): """ RMSD-fit the average of each member trajectory (i.e. the trajectory en block) onto the overall average (default) or a given structure. @param ref: reference structure (default: average structure) @type ref: PDBModel @param mask: atoms to consider (default: None, all heavy) @type mask: [1|0] OR None """ ref = ref or self.avgModel() for m in range(self.n_members): indices = self.memberIndices(m) ## get a copy of this member's Trajectory traj = self.takeFrames(indices) m_avg = traj.avgModel() r, t = m_avg.transformation(ref, mask) traj.transform(r, t) ## replace original frames of this member N0.put(self.frames, indices, traj.frames)
def mergeProfiles( self, p0, p1, maxOverlap=3 ): """ Merge profile p0 with profile p1, as long as they overlap in at most maxOverlap positions @param p0: profile @type p0: [float] @param p1: profile @type p1: [float] @param maxOverlap: maximal allowed overlap between profiles @type maxOverlap: int @return: array @rtype: """ p0 = self.__list2array( p0 ) p1 = self.__list2array( p1 ) overlap = N0.greater( N0.greater(p0,0) + N0.greater(p1,0), 1 ) if N0.sum( overlap ) <= maxOverlap: ## one of the two profiles will in most cases not belong to these ## positions. We can't decide which one is wrong, let's eliminate ## both values. Alternatively we could keep one, or the average, .. N0.put( p1, N0.nonzero( overlap ), 0 ) N0.put( p0, N0.nonzero( overlap ), 0 ) p0 = p0 + p1 return p0
def castHmmDic( self, hmmDic, repete, hmmGap, key ): """ Blow up hmmDic to the number of repetes of the profile used. Correct scores for possible deletions in the search sequence. @param hmmDic: dictionary from L{getHmmProfile} @type hmmDic: dict @param repete: repete information from L{align} @type repete: int @param hmmGap: information about gaps from L{align} @type hmmGap: [int] @param key: name of scoring method to adjust for gaps and repetes @type key: str @return: dictionary with information about the profile @rtype: dict """ s = hmmDic[key] for i in range( repete ): mask = N0.ones( len(s) ) N0.put( mask, hmmGap[i], 0 ) if i == 0: score = N0.compress( mask, s, 0 ) if i > 0: score = N0.concatenate( ( N0.compress( mask, s, 0 ), score ) ) hmmDic[key] = score return hmmDic
def shuffledLists( self, n, lst, mask=None ): """ shuffle order of a list n times, leaving masked(0) elements untouched @param n: number of times to shuffle the list @type n: int @param lst: list to shuffle @type lst: [any] @param mask: mask to be applied to lst @type mask: [1|0] @return: list of shuffeled lists @rtype: [[any]] """ if not mask: mask = N0.ones( len(lst) ) if type( lst ) == list: lst = N0.array( lst ) pos = N0.nonzero( mask ) rand_pos = N0.array( [ self.__shuffleList( pos ) for i in range(n) ] ) result = [] for p in rand_pos: r = copy.copy( lst ) N0.put( r, p, N0.take( lst, pos ) ) result += [r] return result
def patchAround( self, center, nAtoms ): """ patchAround( float_center, int_nAtoms ) -> mask for self.model Create single patch of nAtoms atoms that are closest to center. """ dist = self.__distances( center ) order = N0.argsort( dist ) r = N0.zeros( len( self.model ), 'i' ) N0.put( r, order[:nAtoms], 1 ) return self.centerPatch( r )
def patchAround(self, center, nAtoms): """ patchAround( float_center, int_nAtoms ) -> mask for self.model Create single patch of nAtoms atoms that are closest to center. """ dist = self.__distances(center) order = N0.argsort(dist) r = N0.zeros(len(self.model), 'i') N0.put(r, order[:nAtoms], 1) return self.centerPatch(r)
def __inverseIndices(self, model, i_atoms): """ @param model: model @type model: PDBMode @param i_atoms: atom index @type i_atoms: [int] @return: remaining atom indices of m that are NOT in i_atoms @rtype: [int] """ mask = N0.zeros(len(model), N0.Int) N0.put(mask, i_atoms, 1) return N0.nonzero(N0.logical_not(mask))
def centerPatch( self, patch_mask ): """ patch_mask - [ 1|0 ], mask of non-centered patch -> [ 1|0 ], mask of patch around geometric center of first patch """ c = self.model.center( patch_mask ) dist = self.__distances( c ) n_atoms= len( N0.nonzero( patch_mask ) ) i_dist = N0.argsort( dist )[:n_atoms] result = N0.zeros( len( patch_mask ) ) N0.put( result, i_dist, 1 ) return result
def centerPatch(self, patch_mask): """ patch_mask - [ 1|0 ], mask of non-centered patch -> [ 1|0 ], mask of patch around geometric center of first patch """ c = self.model.center(patch_mask) dist = self.__distances(c) n_atoms = len(N0.nonzero(patch_mask)) i_dist = N0.argsort(dist)[:n_atoms] result = N0.zeros(len(patch_mask)) N0.put(result, i_dist, 1) return result
def toarray( self ): """ Reconstruct dense array:: L.toarray() -> numpy.array, normal dense array @return: normal dense array @rtype: array """ if self.default() is 0: a = N0.zeros( ( self.shape ), self.typecode() ) else: a = N0.ones( (self.shape ), self.typecode() ) * self.default() N0.put( a, self.nondefault(), self.nondefault_values() ) return a
def toarray(self): """ Reconstruct dense array:: L.toarray() -> numpy.array, normal dense array @return: normal dense array @rtype: array """ if self.default() is 0: a = N0.zeros((self.shape), self.typecode()) else: a = N0.ones((self.shape), self.typecode()) * self.default() N0.put(a, self.nondefault(), self.nondefault_values()) return a
def random_contacts( self, contMat, n, maskRec=None, maskLig=None ): """ Create randomized surface contact matrix with same number of contacts and same shape as given contact matrix. @param contMat: template contact matrix @type contMat: matrix @param n: number of matrices to generate @type n: int @param maskRec: surface masks (or something similar) @type maskRec: [1|0] @param maskLig: surface masks (or something similar) @type maskLig: [1|0] @return: list of [n] random contact matricies @rtype: [matrix] """ a,b = N0.shape( contMat ) nContacts = N0.sum( N0.sum( contMat )) if not maskLig: r_size, l_size = N0.shape( contMat ) maskLig = N0.ones( l_size ) maskRec = N0.ones( r_size ) c_mask = N0.ravel( N0.outerproduct( maskRec, maskLig ) ) c_pos = N0.nonzero( c_mask ) # get array with surface positions from complex cont = N0.take( N0.ravel(contMat), c_pos ) length = len( cont ) result = [] for i in range( n ): # create random array ranCont = mathUtils.randomMask( nContacts,length ) # blow up to size of original matrix r = N0.zeros(a*b) N0.put( r, c_pos, ranCont) result += [ N0.reshape( r, (a,b) ) ] return result
def memberMask(self, member): """ Get mask for all frames belonging to a given ensemble member. @param member: member index starting with 0 @type member: int @return: member mask, N0.array( N_frames x 1) of 1||0 @rtype: [1|0] """ result = N0.zeros(self.lenFrames()) if isinstance(member, types.IntType): N0.put(result, self.memberIndices(member), 1) if type(member) == types.ListType: for m in member: N0.put(result, self.memberIndices(m), 1) return result
def memberMap(self, traj): """ Tell which traj frame belongs to which member trajectory. @param traj: Trajectory @type traj: Trajectory @return: member index of each frame OR None if traj is not a EnsembleTraj @rtype: [ int ] OR None """ if not isinstance(traj, EnsembleTraj): return None r = N0.zeros(len(traj), N0.Int) for i in range(traj.n_members): mi = traj.memberIndices(i) N0.put(r, mi, i) return r.tolist()
def memberMap(self, traj): """ Tell which traj frame belongs to which member trajectory. @param traj: Trajectory @type traj: Trajectory @return: member index of each frame OR None if traj is not a EnsembleTraj @rtype: [ int ] OR None """ if not isinstance( traj, EnsembleTraj ): return None r = N0.zeros( len(traj), N0.Int ) for i in range( traj.n_members ): mi = traj.memberIndices( i ) N0.put( r, mi, i ) return r.tolist()
def compareSequences(seqAA_1, seqAA_2): """ """ seqAA_1 = list(seqAA_1) seqAA_2 = list(seqAA_2) seqNr_1 = range(len(seqAA_1)) seqNr_2 = range(len(seqAA_2)) # get mask mask_1 = N0.zeros(len(seqNr_1)) mask_2 = N0.zeros(len(seqNr_2)) # compare sequences seqDiff = getOpCodes(seqAA_1, seqAA_2) # get delete lists del_1, del_2 = getSkipLists(seqDiff) del_1 = [expandRepeats(seqAA_1, *pos) for pos in del_1] del_2 = [expandRepeats(seqAA_2, *pos) for pos in del_2] mask1 = del2mask(seqAA_1, *del_1) mask2 = del2mask(seqAA_2, *del_2) seqAA_1 = N0.compress(mask1, seqAA_1).tolist() seqNr_1 = N0.compress(mask1, seqNr_1).tolist() seqAA_2 = N0.compress(mask2, seqAA_2).tolist() seqNr_2 = N0.compress(mask2, seqNr_2).tolist() # get equal parts seqDiff = getOpCodes(seqAA_1, seqAA_2) equal_1, equal_2 = getEqualLists(seqDiff) seqAA_1, seqNr_1 = getEqual(seqAA_1, seqNr_1, equal_1) seqAA_2, seqNr_2 = getEqual(seqAA_2, seqNr_2, equal_2) N0.put(mask_1, seqNr_1, 1) N0.put(mask_2, seqNr_2, 1) return mask_1, mask_2
def go(self, model_list=None, reference=None): """ Run benchmarking. @param model_list: list of models (default: None S{->} outFolder/L{F_PDBModels}) @type model_list: ModelList @param reference: reference model (default: None S{->} outFolder/L{F_INPUT_REFERENCE}) @type reference: PDBModel """ model_list = model_list or self.outFolder + self.F_PDBModels reference = reference or self.outFolder + self.F_INPUT_REFERENCE pdb_list = T.load('%s' % model_list) reference = PDBModel(reference) # check with python 2.4 iref, imodel = reference.compareAtoms(pdb_list[0]) mask_casting = N0.zeros(len(pdb_list[0])) N0.put(mask_casting, imodel, 1) reference = reference.take(iref) #reference_mask_CA = reference_rmsd.maskCA() atom_mask = N0.zeros(len(pdb_list[0])) N0.put(atom_mask, imodel, 1) rmask = pdb_list[0].profile2mask("n_templates", 1, 1000) amask = pdb_list[0].res2atomMask(rmask) mask_final_ref = N0.compress(mask_casting, amask) mask_final = mask_casting * amask reference = reference.compress(mask_final_ref) for i in range(len(pdb_list)): #self.cad(reference, pdb_list[i]) pdb_list[i], pdb_wo_if = self.output_fittedStructures(\ pdb_list[i], reference, i, mask_final) fitted_model_if = pdb_list[i].compress(mask_final) fitted_model_wo_if = pdb_wo_if.compress(mask_final) coord1 = reference.getXyz() coord2 = fitted_model_if.getXyz() aprofile = self.rmsd_res(coord1, coord2) self.calc_rmsd(fitted_model_if, fitted_model_wo_if, reference, pdb_list[i]) pdb_list[i].atoms.set('rmsd2ref_if', aprofile, mask=mask_final, default=-1, comment="rmsd to known reference structure") self.output_rmsd_aa(pdb_list) self.output_rmsd_ca(pdb_list) self.output_rmsd_res(pdb_list) self.write_PDBModels(pdb_list)
def go(self, model_list = None, reference = None): """ Run benchmarking. @param model_list: list of models (default: None S{->} outFolder/L{F_PDBModels}) @type model_list: ModelList @param reference: reference model (default: None S{->} outFolder/L{F_INPUT_REFERENCE}) @type reference: PDBModel """ model_list = model_list or self.outFolder + self.F_PDBModels reference = reference or self.outFolder + self.F_INPUT_REFERENCE pdb_list = T.load('%s'%model_list) reference = PDBModel(reference) # check with python 2.4 iref, imodel = reference.compareAtoms(pdb_list[0]) mask_casting = N0.zeros(len(pdb_list[0])) N0.put(mask_casting, imodel, 1) reference = reference.take(iref) #reference_mask_CA = reference_rmsd.maskCA() atom_mask = N0.zeros(len(pdb_list[0])) N0.put(atom_mask,imodel,1) rmask = pdb_list[0].profile2mask("n_templates", 1,1000) amask = pdb_list[0].res2atomMask(rmask) mask_final_ref = N0.compress(mask_casting, amask) mask_final = mask_casting * amask reference = reference.compress(mask_final_ref) for i in range(len(pdb_list)): #self.cad(reference, pdb_list[i]) pdb_list[i], pdb_wo_if = self.output_fittedStructures(\ pdb_list[i], reference, i, mask_final) fitted_model_if = pdb_list[i].compress(mask_final) fitted_model_wo_if = pdb_wo_if.compress(mask_final) coord1 = reference.getXyz() coord2 = fitted_model_if.getXyz() aprofile = self.rmsd_res(coord1,coord2) self.calc_rmsd(fitted_model_if, fitted_model_wo_if, reference, pdb_list[i]) pdb_list[i].atoms.set('rmsd2ref_if', aprofile, mask=mask_final, default = -1, comment="rmsd to known reference structure") self.output_rmsd_aa(pdb_list) self.output_rmsd_ca(pdb_list) self.output_rmsd_res(pdb_list) self.write_PDBModels(pdb_list)