def removeAtoms( self, what ): """ Remove atoms from all frames of trajectory and from reference structure. @param what: Specify what atoms to remove:: - function( atom_dict ) -> 1 || 0 or (1..remove) - list of int [4, 5, 6, 200, 201..], indices of atoms to remove - list of int [11111100001101011100..N_atoms], mask (1..remove) - int, remove atom with this index @type what: any @return: N.array(1 x N_atoms_old) of 0||1, mask used to compress the atoms and xyz arrays. This mask can be used to apply the same change to another array of same dimension as the old(!) xyz and atoms. @rtype: array """ ## pass what on to PDBModel, collect resulting mask mask = N.logical_not( self.atomMask( what ) ) self.keepAtoms( N.nonzero( mask ) ) return mask
def removeAtoms(self, what): """ Remove atoms from all frames of trajectory and from reference structure. @param what: Specify what atoms to remove:: - function( atom_dict ) -> 1 || 0 or (1..remove) - list of int [4, 5, 6, 200, 201..], indices of atoms to remove - list of int [11111100001101011100..N_atoms], mask (1..remove) - int, remove atom with this index @type what: any @return: N.array(1 x N_atoms_old) of 0||1, mask used to compress the atoms and xyz arrays. This mask can be used to apply the same change to another array of same dimension as the old(!) xyz and atoms. @rtype: array """ ## pass what on to PDBModel, collect resulting mask mask = N.logical_not(self.atomMask(what)) self.keepAtoms(N.nonzero(mask)) return mask
def stable_sd(x, n_sd=3., min_length=20): if len(x) < min_length: if len(x) == 1: return 0. else: return standardDeviation(x) x = Numeric.array(x) _x = x _outliers = 0. i = 0 while i < 10: mu = median(_x) sd = standardDeviation(_x, mu) outliers = Numeric.greater(abs(x-mu), n_sd*sd) if not Numeric.sum(outliers) or Numeric.sum(outliers==_outliers) == len(x): break _x = Numeric.compress(Numeric.logical_not(outliers), x) _outliers = outliers i += 1 return sd
def getBuriedSurfaceTriangles(self, atomIndices=None, component=0, selnum=1, negate=0): """vfloat, vint, tri = getBuriedSurfaceTriangles(atomIndices=None, component=0, selnum=1, negate=0) Return the triangles of the specified SES component for which at least 'selnum' vertices are either buried (if negate=0) or not burried (if negate=1). 0 < selnum < 4. vfloat and vint hold the data for all vertices of the surface. tri contains the subset of the triangles that are buried. """ assert selnum in (1, 2, 3) vfloat, vint, tri = self.getTriangles(atomIndices, component=component) buriedFlag = vint[:, 2] if negate: buriedFlag = Numeric.logical_not(buriedFlag) #triBuried = Numeric.choose(tri[:,:3], buriedFlag) triBuried = Numeric.take(buriedFlag, tri[:, :3]) sum = Numeric.sum(triBuried, 1) faceInd = Numeric.nonzero(Numeric.greater_equal(sum, selnum)) faces = Numeric.take(tri, faceInd) return vfloat, vint, faces
def parseReference(self, fpdb, dry_out=None ): flushPrint("parsing "+fpdb+"...") m = PDBModel( fpdb ) solute_res = m.atom2resMask( logical_not( m.maskSolvent() ) ) self.lenres = self.lenres or sum( solute_res ) self.lenatoms = len( m ) - sum( m.maskH2O() ) if dry_out: m.remove( m.maskH2O() ) m.writePdb( dry_out ) flushPrint('done.\n')
def parseReference(self, fpdb, dry_out=None): flushPrint("parsing " + fpdb + "...") m = PDBModel(fpdb) solute_res = m.atom2resMask(logical_not(m.maskSolvent())) self.lenres = self.lenres or sum(solute_res) self.lenatoms = len(m) - sum(m.maskH2O()) if dry_out: m.remove(m.maskH2O()) m.writePdb(dry_out) flushPrint('done.\n')
def __inverseIndices( self, model, i_atoms ): """ @param model: model @type model: PDBMode @param i_atoms: atom index @type i_atoms: [int] @return: remaining atom indices of m that are NOT in i_atoms @rtype: [int] """ mask = N.zeros( len( model ),N.Int ) N.put( mask, i_atoms, 1 ) return N.nonzero( N.logical_not( mask ) )
def __inverseIndices(self, model, i_atoms): """ @param model: model @type model: PDBMode @param i_atoms: atom index @type i_atoms: [int] @return: remaining atom indices of m that are NOT in i_atoms @rtype: [int] """ mask = N.zeros(len(model), N.Int) N.put(mask, i_atoms, 1) return N.nonzero(N.logical_not(mask))
def map_angles(angles, period=None): """ maps angles into interval [-pi,pi] """ from numpy.oldnumeric import fmod, greater, logical_not if period is None: from numpy.oldnumeric import pi as period mask = greater(angles, 0.) return mask * (fmod(angles + period, 2 * period) - period) + \ logical_not(mask) * (fmod(angles - period, 2 * period) + period)
def map_angles(angles, period=None): """ maps angles into interval [-pi,pi] """ from numpy.oldnumeric import fmod, greater, logical_not if period is None: from numpy.oldnumeric import pi as period mask = greater(angles, 0.) return mask * (fmod(angles+period, 2*period)-period) + \ logical_not(mask) * (fmod(angles-period, 2*period)+period)
def __center_model(self, model): """ translate PDBModel so that it's center is in 0,0,0 @param model: model to center @type model: PDBModel @return: PDBModel (clone of model) @rtype: PDBModel """ r = model.clone() r.keep(N.nonzero(N.logical_not(r.maskH2O()))) center = r.centerOfMass() r.setXyz(r.getXyz() - center) return r
def __center_model( self, model ): """ translate PDBModel so that it's center is in 0,0,0 @param model: model to center @type model: PDBModel @return: PDBModel (clone of model) @rtype: PDBModel """ r = model.clone() r.keep( N.nonzero( N.logical_not( r.maskH2O() ) ) ) center = r.centerOfMass() r.setXyz( r.getXyz() - center ) return r
def test_ReduceCoordinates(self): """ReduceCoordinates test""" self.m = PDBModel(T.testRoot() + '/com/1BGS.pdb') self.m = self.m.compress(N.logical_not(self.m.maskH2O())) self.m.atoms.set('test', range(len(self.m))) self.red = ReduceCoordinates(self.m, 4) self.mred = self.red.reduceToModel() if self.local: print '\nAtoms before reduction %i' % self.m.lenAtoms() print 'Atoms After reduction %i' % self.mred.lenAtoms() self.assertEqual(self.mred.lenAtoms(), 445)
def test_ReduceCoordinates(self): """ReduceCoordinates test""" self.m = PDBModel( T.testRoot()+'/com/1BGS.pdb' ) self.m = self.m.compress( N.logical_not( self.m.maskH2O() ) ) self.m.atoms.set('test', range(len(self.m))) self.red = ReduceCoordinates( self.m, 4 ) self.mred = self.red.reduceToModel() if self.local: print '\nAtoms before reduction %i'% self.m.lenAtoms() print 'Atoms After reduction %i'% self.mred.lenAtoms() self.assertEqual( self.mred.lenAtoms(), 445 )
def anova2(self, ma3d, groupLens, addInteraction, repMeasuresOnA, callback): """Conducts two-way ANOVA on individual examples; returns a Numeric array of p-values in shape (2, numExamples) or (3, numExamples), depending whether we test for interaction; Note: levels of factors A and B that cause empty cells are removed prior to conducting ANOVA. """ groupLens = Numeric.asarray(groupLens) # arrays to store p-vals if addInteraction: ps = Numeric.ones((3, ma3d.shape[0]), Numeric.Float) else: ps = Numeric.ones((2, ma3d.shape[0]), Numeric.Float) # decide between non-repeated / repeated measures ANOVA for factor time if repMeasuresOnA: fAnova = Anova.AnovaRM12LR else: fAnova = Anova.Anova2wayLR # check for empty cells for all genes at once and remove them tInd2rem = [] ax2Ind = Numeric.concatenate(([0], Numeric.add.accumulate(groupLens))) for aIdx in range(ma3d.shape[1]): for rIdx in range(groupLens.shape[0]): if Numeric.add.reduce(MA.count(ma3d[:,aIdx,ax2Ind[rIdx]:ax2Ind[rIdx+1]],1)) == 0: tInd2rem.append(aIdx) break if len(tInd2rem) > 0: print "Warning: removing time indices %s for all genes" % (str(tInd2rem)) tInd2keep = range(ma3d.shape[1]) for aIdx in tInd2rem: tInd2keep.remove(aIdx) ma3d = ma3d.take(tInd2keep, 1) # for each gene... for eIdx in range(ma3d.shape[0]): # faster check for empty cells for that gene -> remove time indices with empty cells ma2d = ma3d[eIdx] cellCount = MA.zeros((ma2d.shape[0], groupLens.shape[0]), Numeric.Int) for g,(i0,i1) in enumerate(zip(ax2Ind[:-1], ax2Ind[1:])): cellCount[:,g] = MA.count(ma2d[:,i0:i1], 1) ma2dTakeInd = Numeric.logical_not(Numeric.add.reduce(Numeric.equal(cellCount,0),1)) # 1 where to take, 0 where not to take if Numeric.add.reduce(ma2dTakeInd) != ma2dTakeInd.shape[0]: print "Warning: removing time indices %s for gene %i" % (str(Numeric.compress(ma2dTakeInd == 0, Numeric.arange(ma2dTakeInd.shape[0]))), eIdx) ma2d = MA.compress(ma2dTakeInd, ma2d, 0) an = fAnova(ma2d, groupLens, addInteraction, allowReductA=True, allowReductB=True) ps[:,eIdx] = an.ps callback() return ps
def test_outliers(self, traj=None): """EnsembleTraj.outliers/concat test""" self.t2 = self.tr.concat( self.tr ) self.o = self.t2.outliers( z=1.2, mask=self.tr.ref.maskCA(), verbose=self.local ) if self.local: print self.o self.t = self.t2.compressMembers( N.logical_not( self.o ) ) self.p2 = self.t.plotMemberProfiles( 'rms', xlabel='frame' ) if self.local or self.VERBOSITY > 2: self.p2.show() self.assertEqual( self.o, 10 * [False] )
def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model): """ Takes the two fitted structures (with and without iterative fitting), the known structure (reference), and the associated model inside the pdb_list. Calculates the different RMSD and set the profiles @param fitted_model_if: itteratively fitted model @type fitted_model_if: PDBModel @param fitted_model_wo_if: normaly fitted model @type fitted_model_wo_if: PDBModel @param reference: reference model @type reference: PDBModel @param model: model @type model: PDBModel """ ## first calculate rmsd for heavy atoms and CA without ## removing any residues from the model mask_CA = fitted_model_wo_if.maskCA() rmsd_aa = fitted_model_wo_if.rms(reference, fit=0) rmsd_ca = fitted_model_wo_if.rms(reference, mask=mask_CA, fit=1) model.info["rmsd2ref_aa_wo_if"] = rmsd_aa model.info["rmsd2ref_ca_wo_if"] = rmsd_ca outliers_mask = N.logical_not(fitted_model_if.profile("rms_outliers")) ## Now remove the residues that were outliers in the iterative fit ## and calculate the rmsd again fitted_model_if = fitted_model_if.compress(outliers_mask) reference = reference.compress(outliers_mask) mask_CA = fitted_model_if.maskCA() rmsd_aa_if = fitted_model_if.rms(reference, fit=0) rmsd_ca_if = fitted_model_if.rms(reference, mask=mask_CA, fit=1) model.info["rmsd2ref_aa_if"] = rmsd_aa_if model.info["rmsd2ref_ca_if"] = rmsd_ca_if model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \ - N.sum(outliers_mask)) / len(outliers_mask) model.info["rmsd2ref_ca_outliers"] = 1.*(N.sum(mask_CA) \ - N.sum(N.compress(mask_CA, outliers_mask))) \ / N.sum(mask_CA)
def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model): """ Takes the two fitted structures (with and without iterative fitting), the known structure (reference), and the associated model inside the pdb_list. Calculates the different RMSD and set the profiles @param fitted_model_if: itteratively fitted model @type fitted_model_if: PDBModel @param fitted_model_wo_if: normaly fitted model @type fitted_model_wo_if: PDBModel @param reference: reference model @type reference: PDBModel @param model: model @type model: PDBModel """ ## first calculate rmsd for heavy atoms and CA without ## removing any residues from the model mask_CA = fitted_model_wo_if.maskCA() rmsd_aa = fitted_model_wo_if.rms( reference, fit=0 ) rmsd_ca = fitted_model_wo_if.rms( reference, mask=mask_CA, fit=1 ) model.info["rmsd2ref_aa_wo_if"] = rmsd_aa model.info["rmsd2ref_ca_wo_if"] = rmsd_ca outliers_mask = N.logical_not(fitted_model_if.profile("rms_outliers")) ## Now remove the residues that were outliers in the iterative fit ## and calculate the rmsd again fitted_model_if = fitted_model_if.compress( outliers_mask ) reference = reference.compress( outliers_mask ) mask_CA = fitted_model_if.maskCA() rmsd_aa_if = fitted_model_if.rms( reference, fit=0 ) rmsd_ca_if = fitted_model_if.rms( reference, mask=mask_CA, fit=1 ) model.info["rmsd2ref_aa_if"] = rmsd_aa_if model.info["rmsd2ref_ca_if"] = rmsd_ca_if model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \ - N.sum(outliers_mask)) / len(outliers_mask) model.info["rmsd2ref_ca_outliers"] = 1.*(N.sum(mask_CA) \ - N.sum(N.compress(mask_CA, outliers_mask))) \ / N.sum(mask_CA)
def __setAll_1D( self, a ): """ Replace content of this sparseArray with values from Numeric array or list of numbers -- only for 1-dimensional arrays. @param a: array OR list @type a: array OR [ number ] """ if type( a ) is list: a = N.array( a, self.__typecode ) if self.shape != a.shape: raise SparseArrayError, 'dimensions not aligned' self.indices = N.nonzero( N.logical_not( N.equal(a, self.__default) ) ) self.indices = self.indices.tolist() self.values = N.take( a, self.indices ) self.values = self.values.tolist()
def test_outliers(self, traj=None): """EnsembleTraj.outliers/concat test""" self.t2 = self.tr.concat(self.tr) self.o = self.t2.outliers(z=1.2, mask=self.tr.ref.maskCA(), verbose=self.local) if self.local: print self.o self.t = self.t2.compressMembers(N.logical_not(self.o)) self.p2 = self.t.plotMemberProfiles('rms', xlabel='frame') if self.local or self.VERBOSITY > 2: self.p2.show() self.assertEqual(self.o, 10 * [False])
def __setAll_1D(self, a): """ Replace content of this sparseArray with values from Numeric array or list of numbers -- only for 1-dimensional arrays. @param a: array OR list @type a: array OR [ number ] """ if type(a) is list: a = N.array(a, self.__typecode) if self.shape != a.shape: raise SparseArrayError, 'dimensions not aligned' self.indices = N.nonzero(N.logical_not(N.equal(a, self.__default))) self.indices = self.indices.tolist() self.values = N.take(a, self.indices) self.values = self.values.tolist()
def test_AmberParmMirror(self): """AmberParmBuilder.parmMirror test""" ref = self.ref mask = N.logical_not( ref.maskH2O() ) ## keep protein and Na+ ion self.mdry = ref.compress( mask ) self.a = AmberParmBuilder( self.mdry, verbose=self.local, leap_out=self.leapout, debug=self.DEBUG ) self.a.parmMirror(f_out=self.dryparm, f_out_crd=self.drycrd ) self.a.parm2pdb( self.dryparm, self.drycrd, self.drypdb ) self.m1 = PDBModel(self.drypdb) self.m2 = PDBModel(self.refdry) eq = N.array( self.m1.xyz == self.m2.xyz ) self.assert_( eq.all() )
def test_AmberParmMirror(self): """AmberParmBuilder.parmMirror test""" ref = self.ref mask = N.logical_not(ref.maskH2O()) ## keep protein and Na+ ion self.mdry = ref.compress(mask) self.a = AmberParmBuilder(self.mdry, verbose=self.local, leap_out=self.leapout, debug=self.DEBUG) self.a.parmMirror(f_out=self.dryparm, f_out_crd=self.drycrd) self.a.parm2pdb(self.dryparm, self.drycrd, self.drypdb) self.m1 = PDBModel(self.drypdb) self.m2 = PDBModel(self.refdry) eq = N.array(self.m1.xyz == self.m2.xyz) self.assert_(eq.all())
def test_Ramachandran(self): """Ramachandran test""" self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat') self.traj.ref.atoms.set('mass', self.traj.ref.masses()) self.mdl = [self.traj[0], self.traj[11]] self.mdl = [md.compress(md.maskProtein()) for md in self.mdl] self.rama = Ramachandran(self.mdl, name='test', profileName='mass', verbose=self.local) self.psi = N.array(self.rama.psi) if self.local: self.rama.show() r = N.sum(N.compress(N.logical_not(N.equal(self.psi, None)), self.psi)) self.assertAlmostEqual(r, -11717.909796797909, 2)
def ttest_rsmplA(self, ma3d, callback): """conducts related samples t-test on individual examples wrt factor A (variables, ma3d axis 1); returns Numeric array of p-values in shape (1, numExamples). """ ps = -1*Numeric.ones((ma3d.shape[0],), Numeric.Float) for eIdx in range(ma3d.shape[0]): a = ma3d[eIdx][0] b = ma3d[eIdx][1] cond = Numeric.logical_not(Numeric.logical_or(MA.getmaskarray(a), MA.getmaskarray(b))) a = Numeric.asarray(MA.compress(cond, a)) b = Numeric.asarray(MA.compress(cond, b)) if len(a) >= 2: try: ps[eIdx] = scipy.stats.ttest_rel(a,b)[1] except Exception, inst: print "Warning: %s" % str(inst) print "Example %i:\n%s\n%s\n" % (eIdx, str(a), str(b)) ps[eIdx] = 1.0 else: print "Warning: removing example %i:\n%s\n%s\n" % (eIdx, str(a), str(b)) ps[eIdx] = 1.0 callback()
def getBuriedSurfaceTriangles(self, atomIndices=None, component=0, selnum=1, negate=0): """vfloat, vint, tri = getBuriedSurfaceTriangles(atomIndices=None, component=0, selnum=1, negate=0) Return the triangles of the specified SES component for which at least 'selnum' vertices are either buried (if negate=0) or not burried (if negate=1). 0 < selnum < 4. vfloat and vint hold the data for all vertices of the surface. tri contains the subset of the triangles that are buried. """ assert selnum in (1,2,3) vfloat, vint, tri = self.getTriangles(atomIndices, component=component) buriedFlag = vint[:,2] if negate: buriedFlag = Numeric.logical_not(buriedFlag) triBuried = Numeric.choose(tri[:,:3], buriedFlag) sum = Numeric.sum(triBuried, 1) faceInd = Numeric.nonzero( Numeric.greater_equal(sum, selnum) ) faces = Numeric.take(tri, faceInd) return vfloat, vint, faces
def outliers( a, z=5, it=5 ): """ Iterative detection of outliers in a set of numeric values. Requirement: len(a) > 0; outlier detection is only performed if len(a)>2 @param a: array or list of values @type a: [ float ] @param z: z-score threshold for iterative refinement of median and SD @type z: float @param it: maximum number of iterations @type it: int @return: outlier mask, median and standard deviation of last iteration @rtype: N.array( int ), float, float """ assert( len(a) > 0 ) mask = N.ones( len(a) ) out = N.zeros( len(a) ) if len(a) < 3: return out, N.median(a), N.std(a) for i in range( it ): b = N.compress( N.logical_not(out), a ) me = N.median( b ) sd = N.std( b ) bz = N.absolute((N.array( a ) - me) / sd) # pseudo z-score of each value o = bz > z ## print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N.sum(o)) ## stop if converged or reached bottom if (N.sum(o) == N.sum(out)) or (N.sum(o) > len(a) - 3): return o, me, sd out = o return out, me, sd
def test_Trajectory(self): """Trajectory test""" ## f = T.testRoot() + '/lig_pc2_00/pdb/' ## allfiles = os.listdir( f ) ## pdbs = [] ## for fn in allfiles: ## try: ## if (fn[-7:].upper() == '.PDB.GZ'): ## pdbs += [f + fn] ## except: ## pass ## ref = pdbs[0] ## traj = Trajectory( pdbs[:3], ref, rmwat=0 ) ## Loading self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat') ## sort frames after frameNames self.traj.sortFrames() ## sort atoms self.traj.sortAtoms() ## remove waters self.traj = self.traj.compressAtoms( N.logical_not(self.traj.ref.maskH2O())) ## get fluctuation on a residue level r1 = self.traj.getFluct_local(verbose=self.local) ## fit backbone of frames to reference structure self.traj.fit(ref=self.traj.ref, mask=self.traj.ref.maskBB(), verbose=self.local) self.assertAlmostEqual(N.sum(self.traj.profile('rms')), 58.101235746353879, 2)
def outliers(a, z=5, it=5): """ Iterative detection of outliers in a set of numeric values. Requirement: len(a) > 0; outlier detection is only performed if len(a)>2 @param a: array or list of values @type a: [ float ] @param z: z-score threshold for iterative refinement of median and SD @type z: float @param it: maximum number of iterations @type it: int @return: outlier mask, median and standard deviation of last iteration @rtype: N.array( int ), float, float """ assert (len(a) > 0) mask = N.ones(len(a)) out = N.zeros(len(a)) if len(a) < 3: return out, N.median(a), N.std(a) for i in range(it): b = N.compress(N.logical_not(out), a) me = N.median(b) sd = N.std(b) bz = N.absolute((N.array(a) - me) / sd) # pseudo z-score of each value o = bz > z ## print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N.sum(o)) ## stop if converged or reached bottom if (N.sum(o) == N.sum(out)) or (N.sum(o) > len(a) - 3): return o, me, sd out = o return out, me, sd
def test_Trajectory(self): """Trajectory test""" ## f = T.testRoot() + '/lig_pc2_00/pdb/' ## allfiles = os.listdir( f ) ## pdbs = [] ## for fn in allfiles: ## try: ## if (fn[-7:].upper() == '.PDB.GZ'): ## pdbs += [f + fn] ## except: ## pass ## ref = pdbs[0] ## traj = Trajectory( pdbs[:3], ref, rmwat=0 ) ## Loading self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat') ## sort frames after frameNames self.traj.sortFrames() ## sort atoms self.traj.sortAtoms() ## remove waters self.traj = self.traj.compressAtoms( N.logical_not( self.traj.ref.maskH2O()) ) ## get fluctuation on a residue level r1 = self.traj.getFluct_local( verbose=self.local ) ## fit backbone of frames to reference structure self.traj.fit( ref=self.traj.ref, mask=self.traj.ref.maskBB(), verbose=self.local ) self.assertAlmostEqual( N.sum( self.traj.profile('rms') ), 58.101235746353879, 2 )
def histogram2d_2(data, bins, xrange=None, yrange=None): try: data = Numeric.array(data, Float) except: raise TypeError, 'data: list or array excepted, %s given', \ str(type(data)) if not len(shape(data)) == 2: raise ValueError, 'shape of data array must be (n,2)' if type(bins) == type(0): bins = (bins, bins) elif not type(bins) in (type([]), type(())): raise TypeError, 'bins: int, list or tuple expected. %s given', \ str(type(bins)) if yrange is None: yrange = (min(data[:, 1]), max(data[:, 1])) x_min = min(data[:, 0]) x_max = max(data[:, 0]) x_spacing = (x_max - x_min) / bins[0] ystep = abs(yrange[1] - yrange[0]) / float(bins[1]) X = [] Y = [] N = [] for y in arange(yrange[0] + ystep, yrange[1] + ystep, ystep): ## collect values which are in [y,y+ystep] mask = less_equal(data[:, 1], y) set = compress(mask, data, 0) ## create histogram for x-dimension if shape(set[:, 0])[0]: x_histogram = histogram(set[:, 0], bins[0], range=xrange) else: x_bins = arange(x_min + x_spacing / 2., x_max + x_spacing / 2., x_spacing) ## no. of x_bins might be larger as it should be ## (due to numerical errors). if shape(x_bins)[0] - 1 == bins[0]: x_bins = x_bins[:-1] x_histogram = Numeric.concatenate( (x_bins[:, NewAxis], zeros((bins[0], 1))), 1) ## append #point per cell (x_i, y_i, n_i) X.append(x_histogram[:, 0]) N.append(x_histogram[:, 1]) s = ones(shape(x_histogram)[0]) * (y - ystep / 2.) Y.append(s) ## discard processed data data = Numeric.compress(Numeric.logical_not(mask), data, 0) return Numeric.array(X), Numeric.array(Y), Numeric.array(N)
def makeMap( self, maxPerCenter=4 ): """ Calculate mapping between complete and reduced atom list. Creates a (list of lists of int, list of atom dictionaries) containing groups of atom indices into original model, new center atoms @param maxPerCenter: max number of atoms per side chain center atom (default: 4) @type maxPerCenter: int """ resIndex = self.m_sorted.resIndex() resModels= self.m_sorted.resModels() m = self.m_sorted self.currentAtom = 0 groups = [] atoms = DictList() for i in range( len( resIndex ) ): first_atom = resIndex[ i ] if i < len( resIndex )-1: last_atom = resIndex[ i+1 ] - 1 else: last_atom = len( self.a_indices ) - 1 a = m.atoms[ first_atom ] ## res_name = m.atoms[ first_atom ]['residue_name'] ## segid = m.atoms[ first_atom ]['segment_id'] ## chainId = m.atoms[ first_atom ]['chain_id'] ## res_number= m.atoms[ first_atom ]['serial_number'] ## position of this residue's atoms in original PDBModel (unsorted) a_indices = self.a_indices[ first_atom : last_atom+1 ] ## for each center create list of atom indices and a center atom if a['residue_name'] != 'GLY' and a['residue_name'] != 'ALA': bb_a_indices = N.compress( resModels[i].maskBB(), a_indices) sc_a_indices = N.compress( N.logical_not( resModels[i].maskBB()), a_indices ) sc_groups = self.group( sc_a_indices, maxPerCenter ) else: bb_a_indices = a_indices sc_groups = [] groups += [ bb_a_indices ] atoms += [ self.nextAtom(a, 'BB') ] i = 0 for g in sc_groups: groups += [ g ] atoms += [ self.nextAtom( a, 'SC%i'%i) ] i += 1 self.groups = groups self.atoms = atoms
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N.dot(y, N.transpose(r)) + t }. @param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence @type n_iterations: 1|0 @param z: number of standard deviations for outlier definition (default: 2) @type z: float @param eps_rmsd: tolerance in rmsd (default: 0.5) @type eps_rmsd: float @param eps_stdv: tolerance in standard deviations (default: 0.05) @type eps_stdv: float @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] @rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N.ones(len(y), N.int32) while not converged: ## find transformation for best match r, t = findTransformation(N.compress(mask, x, 0), N.compress(mask, y, 0)) ## transform coordinates xt = N.dot(y, N.transpose(r)) + t ## calculate row distances d = N.sqrt(N.sum(N.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N.sqrt(N.average(N.compress(mask, d)**2)) stdv = MU.SD(N.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N.logical_and(mask, N.less(d, rmsd + z * stdv)) outliers = N.nonzero(N.logical_not(mask)) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def __init__( self, model, probe, vdw_set=1, mode=3, mask=None, **kw ): """ SurfaceRacer creates three output files:: result.txt - contains breakdown of surface areas and is writen to the directory where the program resides. This file is discarded here. <file>.txt - contains the accessible, molecular surface areas and average curvature information parsed here. The filename is that of the input pdb file but with a .txt extension. <file>_residue.txt - new in version 5.0 and not used by this wrapper stdout - some general information about the calculation. Redirected to /dev/null @param model: model analyze @type model: PDBModel @param probe: probe radii, Angstrom @type probe: float @param vdw_set: Van del Waals radii set (default: 1):: 1 - Richards (1977) 2 - Chothia (1976) @type vdw_set: 1|2 @param mode: calculation mode (default: 3):: 1- Accessible surface area only 2- Accessible and molecular surface areas 3- Accessible, molecular surface areas and average curvature @type mode: 1|2|3 @param mask: optional atom mask to apply before calling surface racer (default: heavy atoms AND NOT solvent) @type mask: [ bool ] @param kw: additional key=value parameters for Executor: @type kw: key=value pairs :: debug - 0|1, keep all temporary files (default: 0) verbose - 0|1, print progress messages to log (log != STDOUT) node - str, host for calculation (None->local) NOT TESTED (default: None) nice - int, nice level (default: 0) log - Biskit.LogFile, program log (None->STOUT) (default: None) """ Executor.__init__( self, 'surfaceracer', template=self.inp,\ **kw ) self.model = model.clone() self.mask = mask if mask is not None else \ model.maskHeavy() * N.logical_not( model.maskSolvent()) self.model = self.model.compress( self.mask ) ## will be filled in by self.prepare() after the temp folder is ready self.f_pdb = None self.f_pdb_name = None self.f_out_name = None ## parameters that can be changed self.probe = probe self.vdw_set = vdw_set self.mode = mode ## random data dictionaries self.ranMS = SRT.ranMS self.ranAS = SRT.ranAS self.ranMS_Nter = SRT.ranMS_N self.ranAS_Nter = SRT.ranAS_N self.ranMS_Cter = SRT.ranMS_C self.ranAS_Cter = SRT.ranAS_C ## count failures self.i_failed = 0
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N.dot(y, N.transpose(r)) + t }. @param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence @type n_iterations: 1|0 @param z: number of standard deviations for outlier definition (default: 2) @type z: float @param eps_rmsd: tolerance in rmsd (default: 0.5) @type eps_rmsd: float @param eps_stdv: tolerance in standard deviations (default: 0.05) @type eps_stdv: float @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] @rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N.ones(len(y), N.int32 ) while not converged: ## find transformation for best match r, t = findTransformation(N.compress(mask, x, 0), N.compress(mask, y, 0)) ## transform coordinates xt = N.dot(y, N.transpose(r)) + t ## calculate row distances d = N.sqrt(N.sum(N.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N.sqrt(N.average(N.compress(mask, d)**2)) stdv = MU.SD(N.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N.logical_and(mask, N.less(d, rmsd + z * stdv)) outliers = N.nonzero( N.logical_not( mask ) ) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def conservationScore( self, cons_type='cons_ent', ranNr=150, log=StdLog(), verbose=1 ): """ Score of conserved residue pairs in the interaction surface. Optionally, normalized by radom surface contacts. @param cons_type: precalculated conservation profile name, see L{Biskit.PDBDope}. @type cons_type: str @param ranNr: number of random matricies to use (default: 150) @type ranNr: int @param log: log file [STDOUT] @type log: Biskit.LogFile @param verbose: give progress report [1] @type verbose: bool | int @return: conservation score @rtype: float """ try: recCons = self.rec().profile( cons_type, updateMissing=1 ) except: if verbose: log.add('\n'+'*'*30+'\nNO HHM PROFILE FOR RECEPTOR\n'+\ '*'*30+'\n') recCons = N.ones( self.rec().lenResidues() ) try: ligCons = self.lig().profile( cons_type, updateMissing=1 ) except: if verbose: log.add(\ '\n'+'*'*30+'\nNO HHM PROFILE FOR LIGAND\n'+'*'*30+'\n') ligCons = N.ones( self.lig().lenResidues() ) if self.rec().profile( 'surfMask' ): recSurf = self.rec().profile( 'surfMask' ) else: d = PDBDope(self.rec()) d.addSurfaceMask() if self.lig().profile( 'surfMask' ): ligSurf = self.lig().profile( 'surfMask' ) else: d = PDBDope(self.lig()) d.addSurfaceMask() surfMask = N.ravel(N.outerproduct( recSurf, ligSurf )) missing = N.outerproduct( N.equal( recCons, 0), N.equal(ligCons,0)) cont = self.resContacts() * N.logical_not(missing) consMat = N.outerproduct( recCons, ligCons ) score = cont* consMat # get a random score if ranNr != 0: if self.verbose: self.log.write('.') ranMat = mathUtils.random2DArray( cont, ranNr, mask=surfMask ) random_score = N.sum(N.sum( ranMat * consMat ))/( ranNr*1.0 ) return N.sum(N.sum(score))/random_score else: return N.sum(N.sum(score))/ N.sum(N.sum(cont))
def makeMap(self, maxPerCenter=4): """ Calculate mapping between complete and reduced atom list. Creates a (list of lists of int, list of atom dictionaries) containing groups of atom indices into original model, new center atoms @param maxPerCenter: max number of atoms per side chain center atom (default: 4) @type maxPerCenter: int """ resIndex = self.m_sorted.resIndex() resModels = self.m_sorted.resModels() m = self.m_sorted self.currentAtom = 0 groups = [] atoms = DictList() for i in range(len(resIndex)): first_atom = resIndex[i] if i < len(resIndex) - 1: last_atom = resIndex[i + 1] - 1 else: last_atom = len(self.a_indices) - 1 a = m.atoms[first_atom] ## res_name = m.atoms[ first_atom ]['residue_name'] ## segid = m.atoms[ first_atom ]['segment_id'] ## chainId = m.atoms[ first_atom ]['chain_id'] ## res_number= m.atoms[ first_atom ]['serial_number'] ## position of this residue's atoms in original PDBModel (unsorted) a_indices = self.a_indices[first_atom:last_atom + 1] ## for each center create list of atom indices and a center atom if a['residue_name'] != 'GLY' and a['residue_name'] != 'ALA': bb_a_indices = N.compress(resModels[i].maskBB(), a_indices) sc_a_indices = N.compress(N.logical_not(resModels[i].maskBB()), a_indices) sc_groups = self.group(sc_a_indices, maxPerCenter) else: bb_a_indices = a_indices sc_groups = [] groups += [bb_a_indices] atoms += [self.nextAtom(a, 'BB')] i = 0 for g in sc_groups: groups += [g] atoms += [self.nextAtom(a, 'SC%i' % i)] i += 1 self.groups = groups self.atoms = atoms
def histogram2d_2(data, bins, xrange = None, yrange = None): try: data = Numeric.array(data, Float) except: raise TypeError, 'data: list or array excepted, %s given', \ str(type(data)) if not len(shape(data)) == 2: raise ValueError, 'shape of data array must be (n,2)' if type(bins) == type(0): bins = (bins, bins) elif not type(bins) in (type([]), type(())): raise TypeError, 'bins: int, list or tuple expected. %s given', \ str(type(bins)) if yrange is None: yrange = (min(data[:,1]), max(data[:,1])) x_min = min(data[:,0]) x_max = max(data[:,0]) x_spacing = (x_max - x_min) / bins[0] ystep = abs(yrange[1] - yrange[0]) / float(bins[1]) X = [] Y = [] N = [] for y in arange(yrange[0] + ystep , yrange[1] + ystep, ystep): ## collect values which are in [y,y+ystep] mask = less_equal(data[:,1], y) set = compress(mask, data, 0) ## create histogram for x-dimension if shape(set[:,0])[0]: x_histogram = histogram(set[:,0], bins[0], range = xrange) else: x_bins = arange(x_min + x_spacing / 2., x_max + x_spacing / 2., x_spacing) ## no. of x_bins might be larger as it should be ## (due to numerical errors). if shape(x_bins)[0] - 1 == bins[0]: x_bins = x_bins[:-1] x_histogram = Numeric.concatenate((x_bins[:,NewAxis], zeros((bins[0],1))), 1) ## append #point per cell (x_i, y_i, n_i) X.append(x_histogram[:,0]) N.append(x_histogram[:,1]) s = ones(shape(x_histogram)[0]) * (y - ystep / 2.) Y.append(s) ## discard processed data data = Numeric.compress(Numeric.logical_not(mask), data, 0) return Numeric.array(X), Numeric.array(Y), Numeric.array(N)
def __init__(self, model, probe, vdw_set=1, mode=3, mask=None, **kw): """ SurfaceRacer creates three output files:: result.txt - contains breakdown of surface areas and is writen to the directory where the program resides. This file is discarded here. <file>.txt - contains the accessible, molecular surface areas and average curvature information parsed here. The filename is that of the input pdb file but with a .txt extension. <file>_residue.txt - new in version 5.0 and not used by this wrapper stdout - some general information about the calculation. Redirected to /dev/null @param model: model analyze @type model: PDBModel @param probe: probe radii, Angstrom @type probe: float @param vdw_set: Van del Waals radii set (default: 1):: 1 - Richards (1977) 2 - Chothia (1976) @type vdw_set: 1|2 @param mode: calculation mode (default: 3):: 1- Accessible surface area only 2- Accessible and molecular surface areas 3- Accessible, molecular surface areas and average curvature @type mode: 1|2|3 @param mask: optional atom mask to apply before calling surface racer (default: heavy atoms AND NOT solvent) @type mask: [ bool ] @param kw: additional key=value parameters for Executor: @type kw: key=value pairs :: debug - 0|1, keep all temporary files (default: 0) verbose - 0|1, print progress messages to log (log != STDOUT) node - str, host for calculation (None->local) NOT TESTED (default: None) nice - int, nice level (default: 0) log - Biskit.LogFile, program log (None->STOUT) (default: None) """ Executor.__init__( self, 'surfaceracer', template=self.inp,\ **kw ) self.model = model.clone() self.mask = mask if mask is not None else \ model.maskHeavy() * N.logical_not( model.maskSolvent()) self.model = self.model.compress(self.mask) ## will be filled in by self.prepare() after the temp folder is ready self.f_pdb = None self.f_pdb_name = None self.f_out_name = None ## parameters that can be changed self.probe = probe self.vdw_set = vdw_set self.mode = mode ## random data dictionaries self.ranMS = SRT.ranMS self.ranAS = SRT.ranAS self.ranMS_Nter = SRT.ranMS_N self.ranAS_Nter = SRT.ranAS_N self.ranMS_Cter = SRT.ranMS_C self.ranAS_Cter = SRT.ranAS_C ## count failures self.i_failed = 0
def loadTraj(self, fname, shift=0): """ Load single trajectory. """ if self.verbose: self.log.write('Loading %s...' % fname) traj = self.load_locked(fname) if self.verbose: self.log.add('Processing trajectory...') ## convert single member frame index into all member frame index if (self.sstart or self.sstop) and isinstance(traj, EnsembleTraj): self.start = (self.sstart or 0) * traj.n_members self.stop = (self.sstop or 0) * traj.n_members if (self.sstart or self.sstop) and not isinstance(traj, EnsembleTraj): self.start, self.stop = self.sstart, self.sstop if self.verbose: self.log.add('Warning: I am using -ss -se instead of -s -e') ## remove unwanted frames if self.start or self.stop: start, stop = self.start, self.stop or len(traj) traj = traj[start:stop] ## stepping (offset) if self.step > 1: traj = traj.thin(self.step) ## thin with random stepping, use same frames from all trajectories if self.thin: targetLength = int(round(len(traj) * self.thin)) self.thin_i = self.thin_i or \ MU.randomRange(0, len( traj ), targetLength ) traj = traj.takeFrames(self.thin_i) if self.verbose: self.log.add("Thinned to %i frames." % len(traj)) ## keep only allowed atoms (default: all) if self.atoms: traj.ref.addChainId() aMask = traj.ref.mask(lambda a, ok=self.atoms: a['name'] in ok) traj.removeAtoms(N.nonzero(N.logical_not(aMask))) ## get rid of non-standard atoms, water, ions, etc. if not self.solvent: l = traj.lenAtoms() traj = traj.compressAtoms(N.logical_not(traj.ref.maskSolvent())) if self.verbose: self.log.add('%i solvent/ion atoms deleted.' % (l - traj.lenAtoms())) ## delete hydrogens, if requested if self.heavy: l = traj.lenAtoms() traj = traj.compressAtoms(traj.ref.maskHeavy()) if self.verbose: self.log.add('%i hydrogens deleted.' % (l - traj.lenAtoms())) return traj
def deNAN(a, value=0.0): nans = Numeric.logical_not( Numeric.less(a, 0.0) + Numeric.greater_equal(a, 0.0)) return Numeric.where(nans, value, a)
def addSurfaceRacer( self, probe=1.4, vdw_set=1, probe_suffix=0, mask=None ): """ Always adds three different profiles as calculated by fastSurf:: curvature - average curvature (or curvature_1.4 if probe_suffix=1) MS - molecular surface area (or MS_1.4 if probe_suffix=1) AS - accessible surface area (or AS_1.4 if probe_suffix=1) If the probe radii is 1.4 Angstrom and the Richards vdw radii set is used the following two profiles are also added:: relAS - Relative solvent accessible surface relMS - Relative molecular surface See {Biskit.SurfaceRacer} @param probe: probe radius @type probe: float @param vdw_set: defines what wdv-set to use (1-Richards, 2-Chothia) @type vdw_set: 1|2 @param probe_suffix: append probe radius to profile names @type probe_suffix: 1|0 @param mask: optional atom mask to apply before calling surface racer (default: heavy atoms AND NOT solvent) @type mask: [ bool ] @raise ExeConfigError: if external application is missing """ name_MS = 'MS' + probe_suffix * ('_%3.1f' % probe) name_AS = 'AS' + probe_suffix * ('_%3.1f' % probe) name_curv = 'curvature' + probe_suffix * ('_%3.1f' % probe) ## hydrogens + waters are not allowed during FastSurf calculation mask = mask if mask is not None else \ self.m.maskHeavy() * N.logical_not( self.m.maskSolvent() ) fs = SurfaceRacer( self.m, probe, vdw_set=vdw_set, mask=mask ) fs_dic = fs.run() fs_info= fs_dic['surfaceRacerInfo'] self.m.atoms.set( name_MS, fs_dic['MS'], mask, 0, comment='Molecular Surface area in A', version= T.dateString() + ' ' + self.version(), **fs_info ) self.m.atoms.set( name_AS, fs_dic['AS'], mask, 0, comment='Accessible Surface area in A', version= T.dateString() + ' ' + self.version(), **fs_info ) self.m.atoms.set( name_curv, fs_dic['curvature'], mask, 0, comment='Average curvature', version= T.dateString() + ' ' + self.version(), **fs_info ) if round(probe, 1) == 1.4 and vdw_set == 1 and 'relAS' in fs_dic: self.m.atoms.set( 'relAS', fs_dic['relAS'], mask, 0, comment='Relative solvent accessible surf.', version= T.dateString()+' ' +self.version(), **fs_info ) self.m.atoms.set( 'relMS', fs_dic['relMS'], mask, 0, comment='Relative molecular surf.', version= T.dateString()+' '+self.version(), **fs_info )
# equal to the limiting magnitude nondetected = Numeric.less_equal( flux[i, :], 0.0) * Numeric.greater(fluxerr[i, :], 0.0) # Those objects with error flux and flux equal to 0 are assigned a magnitude of -99 # and a flux of 0, which is interpreted by SExtractor as a non-observed object nonobserved = Numeric.less_equal(fluxerr[i, :], 0.0) # When flux error > 100*(flux), mark as nonobserved (Benitez, 24-Oct-03). nonobserved = Numeric.where( fluxerr[i, :] > 100 * (abs(flux[i, :])), 1.0, nonobserved[:]) detected = Numeric.logical_not(nonobserved + nondetected) # Get the zero point for the final magnitudes zpoint = fUtil.zeroPoint( fitsfile) # pass the fits file to zeroPoint func pardict["Zeropoint"] = str(zpoint) pardict["Zeropoint_Error"] = zpoint_err self.logfile.write("Photometric ZeroPoint of " + os.path.basename(fitsfile) + ": " + str(zpoint)) #---------------- Temporary zero point correction --------------------# ## Commented 24-Sep-2002 as per Bugzilla bug #1800 ## ## zpointCor=fUtil.zeroPointCorrection(imfilter) ## zpoint+= zpointCor
def loadTraj( self, fname, shift=0 ): """ Load single trajectory. """ if self.verbose: self.log.write( 'Loading %s...' % fname) traj = self.load_locked( fname ) if self.verbose: self.log.add( 'Processing trajectory...') ## convert single member frame index into all member frame index if (self.sstart or self.sstop) and isinstance(traj, EnsembleTraj): self.start = (self.sstart or 0) * traj.n_members self.stop = (self.sstop or 0) * traj.n_members if (self.sstart or self.sstop) and not isinstance(traj, EnsembleTraj): self.start, self.stop = self.sstart, self.sstop if self.verbose: self.log.add('Warning: I am using -ss -se instead of -s -e') ## remove unwanted frames if self.start or self.stop: start, stop = self.start, self.stop or len(traj) traj = traj[ start : stop ] ## stepping (offset) if self.step > 1: traj = traj.thin( self.step ) ## thin with random stepping, use same frames from all trajectories if self.thin: targetLength = int( round( len( traj ) * self.thin ) ) self.thin_i = self.thin_i or \ MU.randomRange(0, len( traj ), targetLength ) traj = traj.takeFrames( self.thin_i ) if self.verbose: self.log.add( "Thinned to %i frames." % len( traj ) ) ## keep only allowed atoms (default: all) if self.atoms: traj.ref.addChainId() aMask = traj.ref.mask( lambda a,ok=self.atoms: a['name'] in ok ) traj.removeAtoms( N.nonzero( N.logical_not( aMask ) ) ) ## get rid of non-standard atoms, water, ions, etc. if not self.solvent: l = traj.lenAtoms() traj = traj.compressAtoms( N.logical_not(traj.ref.maskSolvent()) ) if self.verbose: self.log.add('%i solvent/ion atoms deleted.'% (l- traj.lenAtoms())) ## delete hydrogens, if requested if self.heavy: l = traj.lenAtoms() traj = traj.compressAtoms( traj.ref.maskHeavy() ) if self.verbose: self.log.add('%i hydrogens deleted.' % (l - traj.lenAtoms()) ) return traj