def calcReducedContacts(self, soln, c): """ Get contact matrices and/or fnarc from reduced-atom models. @param soln: solution number @type soln: int @param c: Complex @type c: Complex """ if not (self.reduced_recs and self.reduced_ligs): return if not self.requested(c, 'c_ratom_10', 'fnarc_10'): return try: ## create Complex with same orientation but reduced coordinates red_rec = self.reduced_recs[c.rec_model.source] red_lig = self.reduced_ligs[c.lig_model.source] red_com = Complex(red_rec, red_lig, c.ligandMatrix) contacts = red_com.atomContacts(10.0, cache=1) if self.requested(c, 'c_ratom_10'): c['c_ratom_10'] = MU.packBinaryMatrix(contacts) if self.c_ref_ratom_10 is not None: ref = N.ravel(self.c_ref_ratom_10) c['fnarc_10'] = N.sum( N.ravel(contacts) * ref )\ / float( N.sum(ref)) except: self.reportError('reduced contacts error', soln)
def linfit( x, y ): """ Calculate linear least-square fit to the points given by x and y. see U{http://mathworld.wolfram.com/LeastSquaresFitting.html} @param x: x-data @type x: [ float ] @param y: y-data @type y: [ float ] @return: m, n, r^2 (slope, intersection, corr. coefficient) @rtype: float, float, float @raise BiskitError: if x and y have different number of elements """ x, y = N.array( x, N.Float64), N.array( y, N.Float64) if len( x ) != len( y ): raise Exception, 'linfit: x and y must have same length' av_x = N.average( x ) av_y = N.average( y ) n = len( x ) ss_xy = N.sum( x * y ) - n * av_x * av_y ss_xx = N.sum( x * x ) - n * av_x * av_x ss_yy = N.sum( y * y ) - n * av_y * av_y slope = ss_xy / ss_xx inter = av_y - slope * av_x corr = ss_xy**2 / ( ss_xx * ss_yy ) return slope, inter, corr
def calcReducedContacts( self, soln, c ): """ Get contact matrices and/or fnarc from reduced-atom models. @param soln: solution number @type soln: int @param c: Complex @type c: Complex """ if not (self.reduced_recs and self.reduced_ligs): return if not self.requested(c,'c_ratom_10','fnarc_10'): return try: ## create Complex with same orientation but reduced coordinates red_rec = self.reduced_recs[ c.rec_model.source ] red_lig = self.reduced_ligs[ c.lig_model.source ] red_com = Complex( red_rec, red_lig, c.ligandMatrix ) contacts = red_com.atomContacts( 10.0, cache=1 ) if self.requested(c, 'c_ratom_10'): c['c_ratom_10'] = MU.packBinaryMatrix(contacts) if self.c_ref_ratom_10 is not None: ref = N.ravel( self.c_ref_ratom_10 ) c['fnarc_10'] = N.sum( N.ravel(contacts) * ref )\ / float( N.sum(ref)) except: self.reportError('reduced contacts error', soln)
def _checkOrth(self, T, TT, eps=0.0001, output=False): """check if the basis is orthogonal on a set of points x: TT == T*transpose(T) == c*Identity INPUT: T: matrix of values of polynomials calculated at common reference points (x) TT = T * transpose(T) eps: max numeric error """ TTd0 = (-1. * Numeric.identity(Numeric.shape(TT)[0]) + 1) * TT # TTd0 = TT with 0s on the main diagonal s = Numeric.sum(Numeric.sum(Numeric.absolute(TTd0))) minT = MLab.min(MLab.min(T)) maxT = MLab.max(MLab.max(T)) minTTd0 = MLab.min(MLab.min(TTd0)) maxTTd0 = MLab.max(MLab.max(TTd0)) if not s < eps: out = "NOT ORTHOG, min(T), max(T):\t%f\t%f\n" % (minT, maxT) out += " min(TTd0), max(TTd0), sum-abs-el(TTd0):\t%f\t%f\t%f" % ( minTTd0, maxTTd0, s) if output: print out return False else: raise out elif output: out = "ORTHOGONAL, min(T), max(T):\t%f\t%f\n" % (minT, maxT) out += " min(TTd0), max(TTd0), sum-abs-el(TTd0):\t%f\t%f\t%f" % ( minTTd0, maxTTd0, s) print out return True
def contactResDistribution( self, cm=None ): """ Count occurrence of residues in protein-protein interface. @param cm: pre-calculated contact matrix (default: None) @type cm: matrix @return: dict {'A':3, 'C':1, .. } (20 standard amino acids) @rtype: dict """ if cm == None: cm = self.resContacts() ## get mask for residues involved in contacts maskLig = N.sum( cm ) maskRec = N.sum( N.transpose( cm )) ## get sequence of contact residues only seqLig = N.compress( maskLig, self.lig().sequence() ) seqRec = N.compress( maskRec, self.rec().sequence() ) seq = ''.join( seqLig ) + ''.join(seqRec) ## convert back to string ## count occurrence of letters result = {} for aa in molUtils.allAA(): result[aa] = seq.count( aa ) return result
def __atomContacts(self, cutoff, rec_mask, lig_mask, cache): """ Intermolecular distances below cutoff after applying the two masks. @param cutoff: cutoff for B{atom-atom} contact in \AA @type cutoff: float @param rec_mask: atom mask @type rec_mask: [1|0] @param lig_mask: atom mask @type lig_mask: [1|0] @param cache: cache pairwise atom distance matrix @type cache: 1|0 @return: atom contact matrix, array sum_rec_mask x sum_lig_mask @rtype: array """ ## get atom coordinats as array 3 x all_atoms rec_xyz = self.rec().getXyz() lig_xyz = self.lig().getXyz() ## get pair-wise distances -> atoms_rec x atoms_lig dist = getattr( self, 'pw_dist', None ) if dist is None or \ N.shape( dist ) != ( N.sum(rec_mask), N.sum(lig_mask) ): dist = self.__pairwiseDistances(N.compress( rec_mask, rec_xyz, 0), N.compress( lig_mask, lig_xyz, 0) ) if cache: self.pw_dist = dist ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig return N.less( dist, cutoff )
def __findTransformation(self, x, y): """ Match two arrays by rotation and translation. Returns the rotation matrix and the translation vector. Back transformation: for atom i new coordinates will be:: y_new[i] = N.dot(r, y[i]) + t for all atoms in one step:: y_new = N.dot(y, N.transpose(r)) + t @param x: coordinates @type x: array @param y: coordinates @type y: array @return: rotation matrix, translation vector @rtype: array, array @author: Michael Habeck """ from numpy.oldnumeric.linear_algebra import singular_value_decomposition as svd ## center configurations x_av = N.sum(x) / len(x) y_av = N.sum(y) / len(y) x = x - x_av y = y - y_av ## svd of correlation matrix v, l, u = svd(N.dot(N.transpose(x), y)) ## build rotation matrix and translation vector r = N.dot(v, u) t = x_av - N.dot(r, y_av) return r, t
def test_molTools(self): """molTools test""" from Biskit import PDBModel ## Loading PDB... self.m = PDBModel(T.testRoot() + '/lig/1A19.pdb') self.m = self.m.compress(self.m.maskProtein()) hb = hbonds(self.m) xyz = xyzOfNearestCovalentNeighbour(40, self.m) if self.local: print '\nThe nearest covalently attached atom to the' print ' atom with index 40 has the coordinates:' print xyz print 'Potential h-bonds in model:' print '(donor index, acceptor index, distance and angle)' for h in hb: print h globals().update(locals()) self.r = N.sum(N.ravel(hb[3:5])) + N.sum(xyz) self.assertAlmostEqual(self.r, self.EXPECT, 3)
def stable_sd(x, n_sd=3., min_length=20): if len(x) < min_length: if len(x) == 1: return 0. else: return standardDeviation(x) x = Numeric.array(x) _x = x _outliers = 0. i = 0 while i < 10: mu = median(_x) sd = standardDeviation(_x, mu) outliers = Numeric.greater(abs(x-mu), n_sd*sd) if not Numeric.sum(outliers) or Numeric.sum(outliers==_outliers) == len(x): break _x = Numeric.compress(Numeric.logical_not(outliers), x) _outliers = outliers i += 1 return sd
def test_SurfaceRacer(self): """SurfaceRacer test""" from Biskit import PDBModel import Biskit.mathUtils as MA if self.local: print 'Loading PDB...' f = T.testRoot() + '/lig/1A19.pdb' m = PDBModel(f) m = m.compress(m.maskProtein()) if self.local: print 'Starting SurfaceRacer' self.x = SurfaceRacer(m, 1.4, vdw_set=1, debug=self.DEBUG, verbose=0) if self.local: print 'Running ...' self.r = self.x.run() c = self.r['curvature'] ms = self.r['MS'] if self.local: print "Curvature: weighted mean %.6f and standard deviation %.3f"\ %(MA.wMean(c,ms), MA.wSD(c,ms)) print 'Relative MS of atoms 10 to 20:', self.r['relMS'][10:20] print 'Relative AS of atoms 10 to 20:', self.r['relAS'][10:20] self.e = (N.sum(self.r['relMS'][10:20]), N.sum(self.r['relAS'][10:20]), N.sum(self.r['curvature'][10:20])) self.assertAlmostEqual(self.e, self.EXPECT)
def addDensity( self, radius=6, minasa=None, profName='density' ): """ Count the number of heavy atoms within the given radius. Values are only collected for atoms with |minasa| accessible surface area. @param minasa: relative exposed surface - 0 to 100% @type minasa: float @param radius: in Angstrom @type radius: float """ mHeavy = self.m.maskHeavy() xyz = N.compress( mHeavy, self.m.getXyz(), 0 ) if minasa and self.m.profile( 'relAS', 0 ) == 0: self.addASA() if minasa: mSurf = self.m.profile2mask( 'relAS', minasa ) else: mSurf = N.ones( self.m.lenAtoms() ) ## loop over all surface atoms surf_pos = N.nonzero( mSurf ) contacts = [] for i in surf_pos: dist = N.sum(( xyz - self.m.xyz[i])**2, 1) contacts += [ N.sum( N.less(dist, radius**2 )) -1] self.m.atoms.set( profName, contacts, mSurf, default=-1, comment='atom density radius %3.1fA' % radius, version= T.dateString() + ' ' + self.version() )
def linfit(x, y): """ Calculate linear least-square fit to the points given by x and y. see U{http://mathworld.wolfram.com/LeastSquaresFitting.html} @param x: x-data @type x: [ float ] @param y: y-data @type y: [ float ] @return: m, n, r^2 (slope, intersection, corr. coefficient) @rtype: float, float, float @raise BiskitError: if x and y have different number of elements """ x, y = N.array(x, N.Float64), N.array(y, N.Float64) if len(x) != len(y): raise Exception, 'linfit: x and y must have same length' av_x = N.average(x) av_y = N.average(y) n = len(x) ss_xy = N.sum(x * y) - n * av_x * av_y ss_xx = N.sum(x * x) - n * av_x * av_x ss_yy = N.sum(y * y) - n * av_y * av_y slope = ss_xy / ss_xx inter = av_y - slope * av_x corr = ss_xy**2 / (ss_xx * ss_yy) return slope, inter, corr
def _checkOrth(self, T, TT, eps=0.0001, output=False): """check if the basis is orthogonal on a set of points x: TT == T*transpose(T) == c*Identity INPUT: T: matrix of values of polynomials calculated at common reference points (x) TT = T * transpose(T) eps: max numeric error """ TTd0 = (-1.*Numeric.identity(Numeric.shape(TT)[0])+1) * TT # TTd0 = TT with 0s on the main diagonal s = Numeric.sum(Numeric.sum(Numeric.absolute(TTd0))) minT = MLab.min(MLab.min(T)) maxT = MLab.max(MLab.max(T)) minTTd0 = MLab.min(MLab.min(TTd0)) maxTTd0 = MLab.max(MLab.max(TTd0)) if not s < eps: out = "NOT ORTHOG, min(T), max(T):\t%f\t%f\n" % (minT, maxT) out += " min(TTd0), max(TTd0), sum-abs-el(TTd0):\t%f\t%f\t%f" % (minTTd0, maxTTd0, s) if output: print out return False else: raise out elif output: out = "ORTHOGONAL, min(T), max(T):\t%f\t%f\n" % (minT, maxT) out += " min(TTd0), max(TTd0), sum-abs-el(TTd0):\t%f\t%f\t%f" % (minTTd0, maxTTd0, s) print out return True
def test_SurfaceRacer(self): """SurfaceRacer test""" from Biskit import PDBModel import Biskit.mathUtils as MA if self.local: print 'Loading PDB...' f = T.testRoot()+'/lig/1A19.pdb' m = PDBModel(f) m = m.compress( m.maskProtein() ) if self.local: print 'Starting SurfaceRacer' self.x = SurfaceRacer( m, 1.4, vdw_set=1, debug=self.DEBUG, verbose=0 ) if self.local: print 'Running ...' self.r = self.x.run() c= self.r['curvature'] ms= self.r['MS'] if self.local: print "Curvature: weighted mean %.6f and standard deviation %.3f"\ %(MA.wMean(c,ms), MA.wSD(c,ms)) print 'Relative MS of atoms 10 to 20:',self.r['relMS'][10:20] print 'Relative AS of atoms 10 to 20:',self.r['relAS'][10:20] self.e = ( N.sum(self.r['relMS'][10:20]), N.sum(self.r['relAS'][10:20]), N.sum(self.r['curvature'][10:20]) ) self.assertAlmostEqual( self.e, self.EXPECT )
def measure_histogram(iterations=1000, sheet_name="V1"): import contrib.jacommands topo.sim["V1"].plastic = False topo.sim.state_push() for i in xrange(0, iterations): topo.sim.run(1) contrib.jacommands.collect_activity(sheet_name) topo.sim.state_pop() concat_activities = [] for a in contrib.jacommands.activities: concat_activities = numpy.concatenate((concat_activities, a.flatten()), axis=1) topo.sim["V1"].plastic = True contrib.jacommands.activities = [] pylab.figure() pylab.subplot(111, yscale='log') #pylab.subplot(111) print shape(concat_activities) mu = sum(concat_activities) / len(concat_activities) print mu (bins, a, b) = pylab.hist(concat_activities, (numpy.arange(80.0) / 40.0) , visible=True) pylab.savefig(normalize_path(str(topo.sim.time()) + 'activity_bar_histogram.png')) bins_axis = numpy.arange(79.0) / 40.0 bins = bins * 1.0 / sum(bins) print sum(bins) exponential = numpy.arange(79, dtype='float32') / 40.0 # compute the mean of the actual distribution #mu=0.024 pylab.figure() pylab.subplot(111, yscale='log') print len(bins_axis) print len(bins) print bins_axis print bins print numpy.exp(- (1 / mu) * (exponential+0.025)) print numpy.exp(- (1 / mu) * (exponential)) exponential = - numpy.exp(- (1 / mu) * (exponential+0.025)) + numpy.exp(- (1 / mu) * (exponential)) pylab.plot(bins_axis, bins) pylab.plot(bins_axis, bins, 'ro') pylab.plot(bins_axis, exponential) pylab.plot(bins_axis, exponential, 'go') pylab.axis(ymin=0.0000000001, ymax=100) #pylab.axis("tight") print mean(exponential) print mean(bins) #pylab.show() pylab.savefig(normalize_path(str(topo.sim.time()) + 'activity_histogram.png')) return bins
def angle(self, c1, c2, c3): v1 = Numeric.array(c1) - Numeric.array(c2) distance1 = math.sqrt(Numeric.sum(v1 * v1)) v2 = Numeric.array(c3) - Numeric.array(c2) distance2 = math.sqrt(Numeric.sum(v2 * v2)) sca = Numeric.dot(v1, v2) / (distance1 * distance2) if sca < -1.0: sca = -1.0 elif sca > 1.0: sca = 1.0 return math.acos(sca) * 180 / math.pi
def parseReference(self, fpdb, dry_out=None ): flushPrint("parsing "+fpdb+"...") m = PDBModel( fpdb ) solute_res = m.atom2resMask( logical_not( m.maskSolvent() ) ) self.lenres = self.lenres or sum( solute_res ) self.lenatoms = len( m ) - sum( m.maskH2O() ) if dry_out: m.remove( m.maskH2O() ) m.writePdb( dry_out ) flushPrint('done.\n')
def shannon_entropy(x, n_bins, _range): d = density(x, n_bins, range=_range, steps=0) delta_x = d[1, 0] - d[0, 0] p = clip(d[:, 1], 1.e-10, 1.e10) p = p / (Numeric.sum(p) * delta_x) S = -delta_x * Numeric.sum(p * Numeric.log(p)) return S
def shannon_entropy(x, n_bins, _range): d = density(x, n_bins, range = _range, steps = 0) delta_x = d[1,0] - d[0,0] p = clip(d[:,1], 1.e-10, 1.e10) p = p / (Numeric.sum(p) * delta_x) S = - delta_x * Numeric.sum(p * Numeric.log(p)) return S
def parseReference(self, fpdb, dry_out=None): flushPrint("parsing " + fpdb + "...") m = PDBModel(fpdb) solute_res = m.atom2resMask(logical_not(m.maskSolvent())) self.lenres = self.lenres or sum(solute_res) self.lenatoms = len(m) - sum(m.maskH2O()) if dry_out: m.remove(m.maskH2O()) m.writePdb(dry_out) flushPrint('done.\n')
def normal(self, at0, at1, at2): c0 = self.getTransformedCoords(at0) c1 = self.getTransformedCoords(at1) c2 = self.getTransformedCoords(at2) v1 = c1 - c0 v2 = c1 - c2 l1 = math.sqrt(Numeric.sum(v1 * v1)) l2 = math.sqrt(Numeric.sum(v2 * v2)) #FIXME #protect against divide by 0 n = self.vvmult(v1 / l1, v2 / l2) n = n / math.sqrt(Numeric.sum(n * n)) return -v2 / l2, n.astype('f')
def wVar(x, w): """ Variance of weighted (w) data (x). @param x: X-D array with numbers @type x: array @param w: 1-D array of same length as x with weight factors @type w: array @return: array('f') or float @rtype: array('f') or float """ wm = wMean(x,w) return ( N.sum(w) / ( (N.sum(w)**2-N.sum(w**2)) ) ) * N.sum(w*(x-wm)**2)
def wVar(x, w): """ Variance of weighted (w) data (x). @param x: X-D array with numbers @type x: array @param w: 1-D array of same length as x with weight factors @type w: array @return: array('f') or float @rtype: array('f') or float """ wm = wMean(x, w) return (N.sum(w) / ((N.sum(w)**2 - N.sum(w**2)))) * N.sum(w * (x - wm)**2)
def randomPatches(self, size, n=None, exclude=None, max_overlap=0, exclude_all=None): """ size - int, number of atoms per patch n - int, number of patches (None -> as many as possible, max 100) exclude - [ 1|0 ], don't touch more than |max_overlap| of these atoms (atom mask) max_overlap - int exclude_all - [ 1|0 ], don't touch ANY of these atoms -> [ [ 1|0 ] ], list of atom masks """ if exclude is None: exclude = N.zeros(self.model.lenAtoms(), 'i') if exclude_all is None: exclude_all = N.zeros(self.model.lenAtoms(), 'i') n = n or 500 centers = self.random_translations(n=n, center=self.center) ## start from excluded patch (if given) working outwards origin = centers[0] tabu = exclude_all if not N.any(tabu): tabu = exclude else: origin = self.model.center(mask=tabu) centers = self.orderCenters(centers, origin) r = [] for i in range(n): m = self.patchAround(centers[i], size) if N.sum( m * exclude ) <= max_overlap \ and N.sum( m * exclude_all ) == 0: exclude = exclude + m r += [m] return r
def getAngle(at1, at2, at3 ): pt1 = Numeric.array(at1.coords, 'f') pt2 = Numeric.array(at2.coords, 'f') pt3 = Numeric.array(at3.coords, 'f') v1 = Numeric.array(pt1 - pt2) v2 = Numeric.array(pt3 - pt2) dist1 = math.sqrt(Numeric.sum(v1*v1)) dist2 = math.sqrt(Numeric.sum(v2*v2)) sca = Numeric.dot(v1, v2)/(dist1*dist2) if sca>1.0: sca = 1.0 elif sca<-1.0: sca = -1.0 ang = math.acos(sca)*180./math.pi return round(ang, 5)
def edge_average(a): "Return the mean value around the edge of an array." if len(ravel(a)) < 2: return float(a[0]) else: top_edge = a[0] bottom_edge = a[-1] left_edge = a[1:-1,0] right_edge = a[1:-1,-1] edge_sum = sum(top_edge) + sum(bottom_edge) + sum(left_edge) + sum(right_edge) num_values = len(top_edge)+len(bottom_edge)+len(left_edge)+len(right_edge) return float(edge_sum)/num_values
def getAngle(at1, at2, at3): pt1 = Numeric.array(at1.coords, 'f') pt2 = Numeric.array(at2.coords, 'f') pt3 = Numeric.array(at3.coords, 'f') v1 = Numeric.array(pt1 - pt2) v2 = Numeric.array(pt3 - pt2) dist1 = math.sqrt(Numeric.sum(v1 * v1)) dist2 = math.sqrt(Numeric.sum(v2 * v2)) sca = Numeric.dot(v1, v2) / (dist1 * dist2) if sca > 1.0: sca = 1.0 elif sca < -1.0: sca = -1.0 ang = math.acos(sca) * 180. / math.pi return round(ang, 5)
def contactsDiff(self, ref, cutoff=None): """ Number of different B{residue-residue} contacts in this and reference complex. @param ref: to compare this one with @type ref: Complex @param cutoff: maximal atom-atom distance, None .. previous setting @type cutoff: float @return: number of contacts different in this and refererence complex. @rtype: int """ both = N.logical_or( self.resContacts(cutoff), ref.resContacts(cutoff)) return N.sum(N.sum(both)) - self.contactsShared( ref, cutoff )
def reduceToModel(self, xyz=None, reduce_profiles=1): """ Create a reduced PDBModel from coordinates. Atom profiles the source PDBModel are reduced by averaging over the grouped atoms. @param xyz: coordinte array (N_atoms x 3) or None (->use reference coordinates) @type xyz: array OR None @return: PDBModel with reduced atom set and profile 'mass' @rtype: PDBModel """ mass = self.m.atoms.get('mass') if xyz is None: xyz = self.m.getXyz() mProf = [N.sum(N.take(mass, group)) for group in self.groups] xyz = self.reduceXyz(xyz) result = PDBModel() for k in self.atoms.keys(): result.atoms.set(k, self.atoms.valuesOf(k)) ## result.setAtoms( self.atoms ) result.setXyz(xyz) result.atoms.set('mass', mProf) if reduce_profiles: self.reduceAtomProfiles(self.m, result) result.residues = self.m.residues return result
def group(self, a_indices, maxPerCenter): """ Group a bunch of integers (atom indices in PDBModel) so that each group has at most maxPerCenter items. @param a_indices: atom indices @type a_indices: [int] @param maxPerCenter: max entries per group @type maxPerCenter: int @return: list of lists of int @rtype: [[int],[int]..] """ ## how many groups are necessary? n_centers = len(a_indices) / maxPerCenter if len(a_indices) % maxPerCenter: n_centers += 1 ## how many items/atoms go into each group? nAtoms = N.ones(n_centers, N.Int) * int(len(a_indices) / n_centers) i = 0 while N.sum(nAtoms) != len(a_indices): nAtoms[i] += 1 i += 1 ## distribute atom indices into groups result = [] pos = 0 for n in nAtoms: result += [N.take(a_indices, N.arange(n) + pos)] pos += n return result
def centerSurfDist(model, surf_mask, mask=None): """ Calculate the longest and shortest distance from the center of the molecule to the surface. @param mask: atoms not to be considerd (default: None) @type mask: [1|0] @param surf_mask: atom surface mask, needed for minimum surface distance @type surf_mask: [1|0] @return: max distance, min distance @rtype: float, float """ if mask is None: mask = model.maskHeavy() ## calculate center of mass center = model.centerOfMass() ## surface atom coordinates surf_xyz = N.compress(mask * surf_mask, model.getXyz(), 0) ## find the atom closest and furthest away from center dist = N.sqrt(N.sum((surf_xyz - center)**2, 1)) minDist = min(dist) maxDist = max(dist) return maxDist, minDist
def wpdg(series,detrend=0,win='triangle'): samples = Numeric.shape(series)[0] wrange = Numeric.arange(0,samples,dtype='d') / (samples - 1.0); if win == 'blackman': window = 0.42 - 0.5 * Numeric.cos(2*math.pi*wrange) + 0.08 * Numeric.cos(4*math.pi*wrange) elif win == 'sin4': window = Numeric.sin(math.pi*wrange)**4.0 else: # if we don't recognize a window, default to triangle pdlen = (samples - 1) / 2.0 window = 1.0 - abs(Numeric.arange(0,samples,dtype='d') - pdlen) / (pdlen) wseries = series.copy() if detrend == 1: leastsquares(wseries,detrend=1) wseries *= window weight = samples * Numeric.sum(window ** 2) wpdgram = pdg(wseries) * (1.0 * samples**2 / weight) return wpdgram
def non_redundant_set(d, threshold): """ returns an array consisting of entries having a minimum pairwise distance of 'threshold'. Based on Ref.: Hobohm et al. (1992). Prot. Sci. 1, 409-417 """ # import random d = Numeric.array(d).astype(Float32) d = less(d, threshold) s = shape(d) d = Numeric.concatenate((reshape(range(s[0]), (-1, 1)), d), 1) ok = 1 while ok: nNeighbours = Numeric.sum(d) - 1 if len(nNeighbours) <= 1: break maxx = max(nNeighbours[1:]) others = nonzero(equal(nNeighbours[1:], maxx)) + 1 candidate = random.choice(others) ok = nNeighbours[candidate] if ok: d = deleteRowAndColumn(d, candidate - 1, candidate) #end while return d[:, 0]
def reduceToModel( self, xyz=None, reduce_profiles=1 ): """ Create a reduced PDBModel from coordinates. Atom profiles the source PDBModel are reduced by averaging over the grouped atoms. @param xyz: coordinte array (N_atoms x 3) or None (->use reference coordinates) @type xyz: array OR None @return: PDBModel with reduced atom set and profile 'mass' @rtype: PDBModel """ mass = self.m.atoms.get('mass') if xyz is None: xyz = self.m.getXyz() mProf = [ N.sum( N.take( mass, group ) ) for group in self.groups ] xyz = self.reduceXyz( xyz ) result = PDBModel() for k in self.atoms.keys(): result.atoms.set( k, self.atoms.valuesOf(k) ) ## result.setAtoms( self.atoms ) result.setXyz( xyz ) result.atoms.set( 'mass', mProf ) if reduce_profiles: self.reduceAtomProfiles( self.m, result ) result.residues = self.m.residues return result
def discrete_shannon_entropy(x, n_bins, _range = None): from R import digamma #@UnresolvedImport hist = density(x, n_bins, _range, steps = 0, hist = 1) x = hist[:,1] v = Numeric.sum(x) s = shape(x) z = zeros(s, Float) for i in range(s[0]): z[i] = digamma(x[i] + 1) return - Numeric.sum(x / v * (z - digamma(v + 1)))
def test_molUtils(self): """molUtils test""" from Biskit import PDBModel S = self ## load a structure S.m = PDBModel(t.testRoot() + '/lig/1A19.pdb') S.model_1 = S.m.compress(S.m.maskProtein()) ## now sort in standard order S.model_2 = sortAtomsOfModel(S.model_1) ## compare the atom order cmp = [] for a in S.model_1.atomRange(): cmp += [cmpAtoms(S.model_1.atoms[a], S.model_2.atoms[a])] self.assertEqual(N.sum(cmp), 159) ## get the primaty sequence as a string S.seq = S.model_1.sequence() ## convert it to a list of three letter code S.seq = single2longAA(S.seq) ## convert it to a list in one letter code S.seq = singleAA(S.seq) self.assertEqual(''.join(S.seq), S.model_1.sequence())
def test_rmsFit( self ): """rmsFit test""" import Biskit.tools as T self.traj = T.load( T.testRoot() + '/lig_pcr_00/traj.dat' ) rt, rmsdLst = match( self.traj.ref.xyz, self.traj[-1].xyz) if self.local: print 'RMSD: %.2f' % rmsdLst[0][1] # return rotation matrix r = abs( N.sum( N.ravel( rt[0] ))) e = abs( N.sum( N.ravel( self.EXPECT ))) self.assertAlmostEqual(r, e, 6)
def density(x, nBins, range = None, steps = 1, hist = 0): """ returns the normalized histogram of x steps = 1: histogram appears as a discrete graph """ import numpy.oldnumeric as Numeric #@Reimport h = histogram(x, nBins, range) binWidth = h[1,0] - h[0,0] if not hist: i = Numeric.sum(h)[1]*binWidth h[:,1] = h[:,1]/i if steps: half = (h[1][0]-h[0][0])/2 l = [(h[0][0]-half,0)] for row in h: l.append((row[0]-half,row[1])) l.append((row[0]+half,row[1])) l.append((h[-1][0]+half,0)) h = l return Numeric.array(h)
def mergeProfiles( self, p0, p1, maxOverlap=3 ): """ Merge profile p0 with profile p1, as long as they overlap in at most maxOverlap positions @param p0: profile @type p0: [float] @param p1: profile @type p1: [float] @param maxOverlap: maximal allowed overlap between profiles @type maxOverlap: int @return: array @rtype: """ p0 = self.__list2array( p0 ) p1 = self.__list2array( p1 ) overlap = N.greater( N.greater(p0,0) + N.greater(p1,0), 1 ) if N.sum( overlap ) <= maxOverlap: ## one of the two profiles will in most cases not belong to these ## positions. We can't decide which one is wrong, let's eliminate ## both values. Alternatively we could keep one, or the average, .. N.put( p1, N.nonzero( overlap ), 0 ) N.put( p0, N.nonzero( overlap ), 0 ) p0 = p0 + p1 return p0
def centerSurfDist( model, surf_mask, mask=None ): """ Calculate the longest and shortest distance from the center of the molecule to the surface. @param mask: atoms not to be considerd (default: None) @type mask: [1|0] @param surf_mask: atom surface mask, needed for minimum surface distance @type surf_mask: [1|0] @return: max distance, min distance @rtype: float, float """ if mask is None: mask = model.maskHeavy() ## calculate center of mass center = model.centerOfMass() ## surface atom coordinates surf_xyz = N.compress( mask*surf_mask, model.getXyz(), 0 ) ## find the atom closest and furthest away from center dist = N.sqrt( N.sum( (surf_xyz-center)**2 , 1 ) ) minDist = min(dist) maxDist = max(dist) return maxDist, minDist
def pairwiseRmsd( self, aMask=None, noFit=0 ): """ Calculate rmsd between each 2 coordinate frames. @param aMask: atom mask @type aMask: [1|0] @return: frames x frames array of float @rtype: array """ frames = self.frames if aMask != None: frames = N.compress( aMask, frames, 1 ) result = N.zeros( (len( frames ), len( frames )), N.Float32 ) for i in range(0, len( frames ) ): for j in range( i+1, len( frames ) ): if noFit: d = N.sqrt(N.sum(N.power(frames[i]-frames[j], 2), 1)) result[i,j] = result[j,i] = N.sqrt( N.average(d**2) ) else: rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 ) result[i,j] = result[j,i] = rmsdLst[0][1] return result
def tripples(self, lst, n): """ Group items of lst into n tripples with minimal overlap. """ all = [] l = len(lst) ## get all possible tripples for i in range(l): for j in range(i + 1, l): for k in range(j + 1, l): all += [(lst[i], lst[j], lst[k])] ## calculate pairwise "distance" between tripples pw = N.zeros((len(all), len(all)), N.Float32) for i in range(len(all)): for j in range(i, len(all)): pw[i, j] = pw[j, i] = len(MU.intersection(all[i], all[j]))**2 pos = 0 r = [] while len(r) < n: r += [pos] ## overlap of selected tripples with all others overlap = N.sum(N.array([pw[i] for i in r])) ## select one with lowest overlap to all tripples selected before pos = N.argmin(overlap) return N.take(all, r)
def logConfidence( x, R, clip=0 ): """ Estimate the probability of x NOT beeing a random observation from a lognormal distribution that is described by a set of random values. @param x: observed value @type x: float @param R: sample of random values @type R: [float] @param clip: clip zeros at this value 0->don't clip (default: 0) @type clip: float @return: confidence that x is not random, median of random distr. @rtype: (float, float) """ if clip and 0 in R: R = N.clip( R, clip, max( R ) ) if clip and x == 0: x = clip ## remove 0 instead of clipping R = N.compress( R, R ) if x == 0: return 0, 0 ## get mean and stdv of log-transformed random sample alpha = N.average( N.log( R ) ) n = len( R ) beta = N.sqrt(N.sum(N.power(N.log( R ) - alpha, 2)) / (n - 1.)) return logArea( x, alpha, beta ), logMedian( alpha )
def projectOnSphere(xyz, radius=None, center=None): """ Project the coordinates xyz on a sphere with a given radius around a given center. @param xyz: cartesian coordinates @type xyz: array N x 3 of float @param radius: radius of target sphere, if not provided the maximal distance to center will be used (default: None) @type radius: float @param center: center of the sphere, if not given the average of xyz will be assigned to the center (default: None) @type center: array 0 x 3 of float @return: array of cartesian coordinates (x, y, z) @rtype: array """ if center is None: center = N.average(xyz) if radius is None: radius = max(N.sqrt(N.sum(N.power(xyz - center, 2), 1))) rtp = cartesianToPolar(xyz - center) rtp[:, 0] = radius return polarToCartesian(rtp) + center
def test_molUtils( self ): """molUtils test""" from Biskit import PDBModel S = self ## load a structure S.m = PDBModel( t.testRoot()+'/lig/1A19.pdb' ) S.model_1 = S.m.compress( S.m.maskProtein() ) ## now sort in standard order S.model_2 = sortAtomsOfModel( S.model_1) ## compare the atom order cmp = [] for a in S.model_1.atomRange(): cmp += [ cmpAtoms( S.model_1.atoms[a], S.model_2.atoms[a] )] self.assertEqual( N.sum(cmp), 159 ) ## get the primaty sequence as a string S.seq = S.model_1.sequence() ## convert it to a list of three letter code S.seq=single2longAA(S.seq) ## convert it to a list in one letter code S.seq=singleAA(S.seq) self.assertEqual( ''.join(S.seq), S.model_1.sequence() )
def _mapToSphere (self, NewPt): # Given a new window coordinate, will modify NewVec in place X = 0 Y = 1 Z = 2 NewVec = Vector3fT () # //Copy paramter into temp point TempPt = copy.copy (NewPt) # //Adjust point coords and scale down to range of [-1 ... 1] TempPt [X] = (NewPt [X] * self.m_AdjustWidth) - 1.0 TempPt [Y] = 1.0 - (NewPt [Y] * self.m_AdjustHeight) # //Compute the square of the length of the vector to the point from the center length = Numeric.sum (Numeric.dot (TempPt, TempPt)) # //If the point is mapped outside of the sphere... (length > radius squared) if (length > 1.0): # //Compute a normalizing factor (radius / sqrt(length)) norm = 1.0 / sqrt (length); # //Return the "normalized" vector, a point on the sphere NewVec [X] = TempPt [X] * norm; NewVec [Y] = TempPt [Y] * norm; NewVec [Z] = 0.0; else: # //Else it's on the inside # //Return a vector to a point mapped inside the sphere sqrt(radius squared - length) NewVec [X] = TempPt [X] NewVec [Y] = TempPt [Y] NewVec [Z] = sqrt (1.0 - length) return NewVec
def nonRedundantSet(d, threshold, distanceMatrix = 1): """ returns an array consisting of entries having a maximum similarity (or distance) of 'threshold'. distanceMatrix <> None means matrix elemens are similarities. Ref.: Hobohm et al. (1992). Prot. Sci. 1, 409-417 gives somehow weired results. """ import whrandom #@UnresolvedImport d = Numeric.array(d).astype(Float32) if not distanceMatrix: d = less(d, threshold) else: d = greater(d, threshold) s = shape(d) d = Numeric.concatenate((reshape(range(s[0]),(-1,1)),d),1) ok = 1 while ok: nNeighbours = Numeric.sum(d) if len(nNeighbours) <= 1: break maxx = max(nNeighbours[1:]) others = Numeric.nonzero(equal(nNeighbours[1:], maxx))+1 candidate = whrandom.choice(others) ok = nNeighbours[candidate] if ok: d = deleteRowAndColumn(d, candidate-1, candidate) # end while return d[:,0]
def group( self, a_indices, maxPerCenter ): """ Group a bunch of integers (atom indices in PDBModel) so that each group has at most maxPerCenter items. @param a_indices: atom indices @type a_indices: [int] @param maxPerCenter: max entries per group @type maxPerCenter: int @return: list of lists of int @rtype: [[int],[int]..] """ ## how many groups are necessary? n_centers = len( a_indices ) / maxPerCenter if len( a_indices ) % maxPerCenter: n_centers += 1 ## how many items/atoms go into each group? nAtoms = N.ones(n_centers, N.Int) * int(len( a_indices ) / n_centers) i=0 while N.sum(nAtoms) != len( a_indices ): nAtoms[i] += 1 i += 1 ## distribute atom indices into groups result = [] pos = 0 for n in nAtoms: result += [ N.take( a_indices, N.arange(n) + pos) ] pos += n return result
def non_redundant_set(d, threshold): """ returns an array consisting of entries having a minimum pairwise distance of 'threshold'. Based on Ref.: Hobohm et al. (1992). Prot. Sci. 1, 409-417 """ # import random d = Numeric.array(d).astype(Float32) d = less(d, threshold) s = shape(d) d = Numeric.concatenate((reshape(range(s[0]),(-1,1)),d),1) ok = 1 while ok: nNeighbours = Numeric.sum(d)-1 if len(nNeighbours) <= 1: break maxx = max(nNeighbours[1:]) others = nonzero(equal(nNeighbours[1:], maxx))+1 candidate = random.choice(others) ok = nNeighbours[candidate] if ok: d = deleteRowAndColumn(d, candidate-1, candidate) #end while return d[:,0]
def nonRedundantSet(d, threshold, distanceMatrix=1): """ returns an array consisting of entries having a maximum similarity (or distance) of 'threshold'. distanceMatrix <> None means matrix elemens are similarities. Ref.: Hobohm et al. (1992). Prot. Sci. 1, 409-417 gives somehow weired results. """ import whrandom #@UnresolvedImport d = Numeric.array(d).astype(Float32) if not distanceMatrix: d = less(d, threshold) else: d = greater(d, threshold) s = shape(d) d = Numeric.concatenate((reshape(range(s[0]), (-1, 1)), d), 1) ok = 1 while ok: nNeighbours = Numeric.sum(d) if len(nNeighbours) <= 1: break maxx = max(nNeighbours[1:]) others = Numeric.nonzero(equal(nNeighbours[1:], maxx)) + 1 candidate = whrandom.choice(others) ok = nNeighbours[candidate] if ok: d = deleteRowAndColumn(d, candidate - 1, candidate) # end while return d[:, 0]
def discrete_shannon_entropy(x, n_bins, _range=None): from R import digamma #@UnresolvedImport hist = density(x, n_bins, _range, steps=0, hist=1) x = hist[:, 1] v = Numeric.sum(x) s = shape(x) z = zeros(s, Float) for i in range(s[0]): z[i] = digamma(x[i] + 1) return -Numeric.sum(x / v * (z - digamma(v + 1)))
def density(x, nBins, range=None, steps=1, hist=0): """ returns the normalized histogram of x steps = 1: histogram appears as a discrete graph """ import numpy.oldnumeric as Numeric #@Reimport h = histogram(x, nBins, range) binWidth = h[1, 0] - h[0, 0] if not hist: i = Numeric.sum(h)[1] * binWidth h[:, 1] = h[:, 1] / i if steps: half = (h[1][0] - h[0][0]) / 2 l = [(h[0][0] - half, 0)] for row in h: l.append((row[0] - half, row[1])) l.append((row[0] + half, row[1])) l.append((h[-1][0] + half, 0)) h = l return Numeric.array(h)
def __exposedResidues( self, ASA_values, sidechainCut=0.0, backboneCut=0.0, totalCut=0.0 ): """ Decide what is a surface exposed residue and what is not. sidechainCut, backboneCut, totalCut - float, cutoff value for what will be considered as a exposed residue. All three values have to pass the test. @param ASA_values: array with ASA values for side chains, backbone and total calculated in L{__read_residueASA}. @type ASA_values: array @param sidechainCut: cutoff ASA value for considering the side chain to consider thew residue being exposed (default: 0.0) @type sidechainCut: float @param backboneCut: cutoffvalue for back bone ASA @type backboneCut: float @param totalCut: cutoff for total ASA @type totalCut: float @return: residue mask, where 0 = burried @rtype: [1|0] """ col_0 = N.greater( N.transpose(ASA_values)[0], totalCut ) col_1 = N.greater( N.transpose(ASA_values)[1], backboneCut ) col_2 = N.greater( N.transpose(ASA_values)[2], sidechainCut ) col_012 = N.concatenate( ([col_0],[col_1],[col_2]) ) exposedList = N.greater(N.sum(col_012), 0) return exposedList
def computeRMSD(self, listCoords): """rmsd <- computRMSD(listCoords) rmsd returns the overall root mean square distance (rmsd) and also sets self.distVect as the vector of distances between each pair of points. """ if self.refCoords is None: raise ValueError("no reference coordinates set") if len(self.refCoords) != len(listCoords): raise ValueError("input vector length mismatch") deltaVect = Numeric.array(self.refCoords) - Numeric.array(listCoords) distSquaredVect = Numeric.sum(Numeric.transpose(deltaVect*deltaVect)) self.distVect = Numeric.sqrt(distSquaredVect) self.rmsd = math.sqrt(Numeric.sum(distSquaredVect)/len(self.refCoords)) return self.rmsd
def work(myid, numprocs, data): """This simple example function that slices up the data based on values of numproc and myid. """ import numpy.oldnumeric as Numeric # Identify local slice and process it # interval = len(data) myinterval = interval/numprocs mylower = myid*myinterval if myid == numprocs-1: myupper = interval+1 else: myupper = mylower + myinterval mydata = data[mylower:myupper] # Computation (average) # myavg = float(Numeric.sum(mydata))/len(mydata) print "P%d: %s Local avg=%.4f" %(myid, str(mydata), myavg) return myavg*len(mydata)
def mergeProfiles(self, p0, p1, maxOverlap=3): """ Merge profile p0 with profile p1, as long as they overlap in at most maxOverlap positions @param p0: profile @type p0: [float] @param p1: profile @type p1: [float] @param maxOverlap: maximal allowed overlap between profiles @type maxOverlap: int @return: array @rtype: """ p0 = self.__list2array(p0) p1 = self.__list2array(p1) overlap = N.greater(N.greater(p0, 0) + N.greater(p1, 0), 1) if N.sum(overlap) <= maxOverlap: ## one of the two profiles will in most cases not belong to these ## positions. We can't decide which one is wrong, let's eliminate ## both values. Alternatively we could keep one, or the average, .. N.put(p1, N.nonzero(overlap), 0) N.put(p0, N.nonzero(overlap), 0) p0 = p0 + p1 return p0
def sortPoly(self, order=-1): if __debug__: if hasattr(DejaVu, 'functionName'): DejaVu.functionName() """None <- sortPoly(order=-1) Sorts the geometry polygons according to z values of polygon's geomtric centers. Order=-1 sorts by furthest z first, order=1 sorts by closest z first""" # FIXME will not work with instance matrices mat = self.GetMatrix() mat = Numeric.reshape(mat, (4,4)) vt = self.vertexSet.vertices*mat if vt is None: return triv = Numeric.take(vt, self.faceSet.faces.array) trig = Numeric.sum(triv,1)/3. trigz = trig[:,2] #triangle's center of gravity z value ind = Numeric.argsort(trigz) # sorted indices if len(self.faceSet.faces.array): faces = Numeric.take(self.faceSet.faces.array, ind[::order]) if self.shading==GL.GL_FLAT: # we also need to re-arrange the # face normals if self.normals is None: normals = None else: if len(self.normals)>1: normals = Numeric.take(self.normals, ind[::order]) else: normals = self.normals else: normals = None self.Set(faces=faces, fnormals=normals)