def linfit(x, y): """ Calculate linear least-square fit to the points given by x and y. see U{http://mathworld.wolfram.com/LeastSquaresFitting.html} :param x: x-data :type x: [ float ] :param y: y-data :type y: [ float ] :return: m, n, r^2 (slope, intersection, corr. coefficient) :rtype: float, float, float :raise BiskitError: if x and y have different number of elements """ x, y = N0.array(x, N0.Float64), N0.array(y, N0.Float64) if len(x) != len(y): raise Exception('linfit: x and y must have same length') av_x = N0.average(x) av_y = N0.average(y) n = len(x) ss_xy = N0.sum(x * y) - n * av_x * av_y ss_xx = N0.sum(x * x) - n * av_x * av_x ss_yy = N0.sum(y * y) - n * av_y * av_y slope = ss_xy / ss_xx inter = av_y - slope * av_x corr = ss_xy**2 / (ss_xx * ss_yy) return slope, inter, corr
def arrayEqual(a, b): """ Compare 2 arrays or lists of numbers for equality. :param a: first array (multi-dimensional is supported) :type a: array / list :param b: second array (multi-dimensional is supported) :type b: array / list :return: 1 if array/list a equals array/list b :rtype: 1|0 """ if a is None or b is None: return a is b if len(a) != len(b): return 0 if type(a) is list: a = N0.array(a) if type(b) is list: b = N0.array(b) a = N0.ravel(a) b = N0.ravel(b) return N0.sum(a == b) == len(a)
def linfit( x, y ): """ Calculate linear least-square fit to the points given by x and y. see U{http://mathworld.wolfram.com/LeastSquaresFitting.html} :param x: x-data :type x: [ float ] :param y: y-data :type y: [ float ] :return: m, n, r^2 (slope, intersection, corr. coefficient) :rtype: float, float, float :raise BiskitError: if x and y have different number of elements """ x, y = N0.array( x, N0.Float64), N0.array( y, N0.Float64) if len( x ) != len( y ): raise Exception('linfit: x and y must have same length') av_x = N0.average( x ) av_y = N0.average( y ) n = len( x ) ss_xy = N0.sum( x * y ) - n * av_x * av_y ss_xx = N0.sum( x * x ) - n * av_x * av_x ss_yy = N0.sum( y * y ) - n * av_y * av_y slope = ss_xy / ss_xx inter = av_y - slope * av_x corr = ss_xy**2 / ( ss_xx * ss_yy ) return slope, inter, corr
def arrayEqual( a, b ): """ Compare 2 arrays or lists of numbers for equality. :param a: first array (multi-dimensional is supported) :type a: array / list :param b: second array (multi-dimensional is supported) :type b: array / list :return: 1 if array/list a equals array/list b :rtype: 1|0 """ if a is None or b is None: return a is b if len(a) != len(b): return 0 if type(a) is list: a = N0.array( a ) if type(b) is list: b = N0.array( b ) a = N0.ravel( a ) b = N0.ravel( b ) return N0.sum( a==b ) == len(a)
def calc(self, models): """ Calculate angles, profiles and other things needed. @param models: List of models @type models: [ PDBModel ] """ res_count = 0 for m in models: ## add profile if not there if self.profileName: self.prof += [self.calcProfiles(m)] ## calclate phi and psi angles for model self.phi_and_psi(m) ## get list with GLY and PRO residue indices gly_atomInd = m.indices(lambda a: a['residue_name'] == 'GLY') gly_resInd = N0.array(m.atom2resIndices(gly_atomInd)) pro_atomInd = m.indices(lambda a: a['residue_name'] == 'PRO') pro_resInd = N0.array(m.atom2resIndices(pro_atomInd)) self.gly.append(gly_resInd + res_count) self.pro.append(pro_resInd + res_count) res_count += m.lenResidues()
def histogram(data, nbins, range = None): """ Create a histogram. Comes from Konrad Hinsen: Scientific Python :param data: data list or array :type data: [any] :param nbins: number of bins :type nbins: int :param range: data range to create histogram from (min val, max val) :type range: (float, float) OR None :return: array (2 x len(data) ) with start of bin and witdh of bin. :rtype: array """ data = N0.array(data, N0.Float) if range is None: min = N0.minimum.reduce(data) max = N0.maximum.reduce(data) else: min, max = range data = N0.repeat(data, N0.logical_and(N0.less_equal(data, max), N0.greater_equal(data, min))) bin_width = (max-min)/nbins data = N0.floor((data - min)/bin_width).astype(N0.Int) histo = N0.add.reduce(N0.equal( N0.arange(nbins)[:,N0.NewAxis], data), -1) histo[-1] = histo[-1] + N0.add.reduce(N0.equal(nbins, data)) bins = min + bin_width*(N0.arange(nbins)+0.5) return N0.transpose(N0.array([bins, histo]))
def calc( self, models ): """ Calculate angles, profiles and other things needed. @param models: List of models @type models: [ PDBModel ] """ res_count = 0 for m in models: ## add profile if not there if self.profileName: self.prof += [ self.calcProfiles( m ) ] ## calclate phi and psi angles for model self.phi_and_psi( m ) ## get list with GLY and PRO residue indices gly_atomInd = m.indices(lambda a: a['residue_name']=='GLY') gly_resInd = N0.array( m.atom2resIndices( gly_atomInd ) ) pro_atomInd = m.indices(lambda a: a['residue_name']=='PRO') pro_resInd = N0.array( m.atom2resIndices( pro_atomInd ) ) self.gly.append( gly_resInd + res_count ) self.pro.append( pro_resInd + res_count ) res_count += m.lenResidues()
def test_ColorSpectrum( self ): """ColorSpectrum test""" try: import biskit.tools as T import biggles as B except: B = 0 c_grey = ColorSpectrum( 'grey', 0, 100 ) c_sausage = ColorSpectrum( 'sausage', 0, 100 ) c_plasma = ColorSpectrum( 'plasma', 0, 100 ) c_plasma2 = ColorSpectrum( 'plasma2', 0, 100 ) if B: self.p = B.FramedPlot() ## old_spectrum = T.colorSpectrum( 100 ) self.result = [] for i in range( -1, 100 ): x = (i, i+1 ) if B: self.result += [ c_grey.color( i ) ] self.p.add( B.FillBelow( x, (1., 1.), color = c_grey.color( i ) ) ) self.p.add( B.FillBelow( x, (0.75, 0.75), color = c_sausage.color( i ) ) ) self.p.add( B.FillBelow( x, (0.5, 0.5), color = c_plasma.color( i ) ) ) self.p.add( B.FillBelow( x, (0.25, 0.25), color = c_plasma2.color( i ) ) ) ## self.p.add( B.FillBelow( x, (0., 0.), ## color = old_spectrum[i] )) if B: self.p.add( B.Curve( (0,100), (1.,1.)) ) self.p.add( B.Curve( (0,100), (.75,.75)) ) self.p.add( B.Curve( (0,100), (.5,.5) )) self.p.add( B.Curve( (0,100), (0.25, 0.25)) ) self.p.add( B.Curve( (0,100), (0.0, 0.0)) ) self.p.add( B.PlotLabel( 0.5 ,0.9, 'grey') ) self.p.add( B.PlotLabel( 0.5 ,0.65, 'sausage') ) self.p.add( B.PlotLabel( 0.5 ,0.4, 'plasma') ) self.p.add( B.PlotLabel( 0.5 ,0.15, 'plasma2') ) if (self.local or self.VERBOSITY > 2) and B: self.p.show() ##self.assertEqual(self.result, self.EXPECTED) ## tolerate two differences to account for Python 3 result a = N0.array(self.result) b = N0.array(self.EXPECTED) self.assert_(N0.count_nonzero(a-b)<3)
def parse_result(self): """ Parse the SurfaceRacer output file which has the same nawe as the input pdb, but with a txt extension. The output ends up un the same folder as the input. In addition a file called result.txt is created in the same directory as the binary. @return: dictionary with curvature and surface data @rtype: dict """ curv = [] ## average curvature ms = [] ## molecular surface area asa = [] ## accessible surface area try: out_file = open(self.f_out_name) lines = out_file.readlines() out_file.close() except: raise SurfaceRacer_Error( 'SurfaceRacer result file %s does not exist. You have probably encountered a very rare SurfaceRacer round off error that have caused the program to terminate. The simplest remedy to this problem is to increase the probe radii with a very small number, for example from %.3f to %.3f.' % (self.f_out_name, self.probe, self.probe + 0.001)) if len(lines) == 0: raise SurfaceRacer_Error('SurfaceRacer result file %s empty' % self.f_out_name) ## don't parse cavity information, find first occurance or 'CAVITY' end = len(lines) for i in range(len(lines) - 1, 0, -1): if lines[i][:6] == 'CAVITY': end = i for i in range(end): curv += [float(str.strip(lines[i][-11:-1]))] ms += [float(str.strip(lines[i][-17:-11]))] asa += [float(str.strip(lines[i][-24:-17]))] result = { 'curvature': N0.array(curv), 'MS': N0.array(ms), 'AS': N0.array(asa), 'surfaceRacerInfo': { 'probe_radius': self.probe, 'vdw_set': self.vdw_set } } ## check curvature profile integrity result['curvature'] = \ self.__checkProfileIntegrity( result['curvature'], 1.0, -1.0 ) return result
def test_plot(self): """gnuplot.plot test""" # List of (x, y) pairs # plot([(0.,1),(1.,5),(2.,3),(3.,4)]) # plot( zip( range(10), range(10) ) ) # Two plots; each given by a 2d array import biskit.core.oldnumeric as N0 x = N0.arange(10) y1 = x**2 y2 = (10 - x)**2 plot(N0.transpose(N0.array([x, y1])), N0.transpose(N0.array([x, y2])))
def __defaults(self ): """ backwards compatibility to earlier pickled trajectories """ self.pc = getattr( self, 'pc', None ) self.frameNames = getattr( self, 'frameNames', None) self.profiles = getattr( self, 'profiles', TrajProfiles() ) if type( self.frames ) is not N0.ndarray: self.frames = N0.array( self.frames ) if type( self.resIndex ) is not N0.ndarray: self.resIndex = N0.array( self.resIndex )
def test_plot( self ): """gnuplot.plot test""" # List of (x, y) pairs # plot([(0.,1),(1.,5),(2.,3),(3.,4)]) # plot( zip( range(10), range(10) ) ) # Two plots; each given by a 2d array import biskit.core.oldnumeric as N0 x = N0.arange(10) y1 = x**2 y2 = (10-x)**2 plot( N0.transpose(N0.array([x, y1])), N0.transpose(N0.array([x, y2])))
def toIntArray(o): """ Convert single value or list of values to numpy array of int. :param o: value or list :type o: int or [int] :return: array of integer :rtype: N0.array('i') """ if type(o) == list or type(o) == type(N0.array([])): return N0.array(map(int, o)) return N0.array([int(o)])
def toIntArray( o ): """ Convert single value or list of values to numpy array of int. :param o: value or list :type o: int or [int] :return: array of integer :rtype: N0.array('i') """ if type( o ) == list or type( o ) == type( N0.array([])): return N0.array( map( int, o ) ) return N0.array( [ int( o ) ] )
def __init__(self, rec_model=None,lig_model=None, ligMatrix=None,info={} ): """ @param rec_model: model of original receptor conformation @type rec_model: PDBModel OR XplorModel @param lig_model: model of original ligand conformation @type lig_model: PDBModel OR XplorModel @param ligMatrix: Numeric array 4 by 4, ligand transformation matrix @type ligMatrix: matrix @param info: optional dictionary with additional infos e.g. {'eshape':-123.3, 'rms':12.2 } @type info: dict """ self.rec_model = rec_model # XplorModel object for receptor self.lig_model = lig_model # " for ligand self.lig_transformed = None # " with transformed coordinates self.pw_dist = None # cached pw atom distances rec x lig self.info = { 'date':t.dateSortString() } ## default info record self.info.update( info ) self.ligandMatrix = ligMatrix if self.ligandMatrix is None: self.ligandMatrix = N0.array([ [1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1],], N0.Float32) ## compressed by slim self.contacts = None ## version as of creation of this object self.initVersion = biskit.__version__
def tripples( self, lst, n ): """ Group items of lst into n tripples with minimal overlap. """ all = [] l = len( lst ) ## get all possible tripples for i in range( l ): for j in range( i+1, l ): for k in range( j+1, l ): all += [ ( lst[i], lst[j], lst[k] ) ] ## calculate pairwise "distance" between tripples pw = N0.zeros( (len(all), len(all)), N0.Float32 ) for i in range( len( all ) ): for j in range( i, len(all) ): pw[i,j] = pw[j,i] = len( MU.intersection(all[i],all[j]) )**2 pos = 0 r = [] while len( r ) < n: r += [ pos ] ## overlap of selected tripples with all others overlap = N0.sum( N0.array( [ pw[ i ] for i in r ] ) ) ## select one with lowest overlap to all tripples selected before pos = N0.argmin( overlap ) return N0.take( all, r )
def valuesOf(self, infoKey, default=None, indices=None, unique=0 ): """ Get all values of a certain info record of all or some Complexes. @param infoKey: key for info dict @type infoKey: str @param default: default value if infoKey is not found (None) @type default: any @param indices: list of int OR None(=all), indices of Complexes (None) @type indices: [int] OR None @param unique: report each value only once (set union), (default 0) @type unique: 1|0 @return: list of values @rtype: [any] """ l = self if indices is not None: l = N0.take( N0.array(l,'O'), indices ) if not unique: return [ c.info.get(infoKey, default) for c in l ] r = [] for c in l: if c.info.get(infoKey, default) not in r: r += [ c.info.get( infoKey ) ] return r
def residusMaximus( self, atomValues, mask=None ): """ Take list of value per atom, return list where all atoms of any residue are set to the highest value of any atom in that residue. (after applying mask) :param atomValues: list 1 x N, values per atom :type atomValues: [ float ] :param mask: list 1 x N, 0|1, 'master' atoms of each residue :type mask: [1|0] :return: Numpy array 1 x N of float :rtype: array """ if mask is None: mask = N0.ones( len( self.frames[0] ), N0.Int32 ) ## eliminate all values that do not belong to the selected atoms masked = atomValues * mask result = [] ## set all atoms of each residue to uniform value for res in range( 0, self.resMap()[-1]+1 ): ## get atom entries for this residue resAtoms = N0.compress( N0.equal( self.resMap(), res ), masked ) ## get maximum value masterValue = max( resAtoms ) result += resAtoms * 0.0 + masterValue return N0.array( result )
def takeMembers( self, mIndices ): """ Take all frames belonging to the members in mIndices:: takeMembers( mIndices ) -> EnsembleTraj with frames of given members :param mIndices: list of member indices :type mIndices: [int] OR array('i') :return: EnsembleTraj with specified members :rtype: EnsembleTraj @todo: return self.__class__ instead of EnsembleTraj """ try: ## assumes that each member traj has same number of frames fi = N0.array( [ self.memberIndices( i ) for i in mIndices ] ) fi = N0.ravel( N0.transpose( fi ) ) n_members = len( mIndices ) ## has wrong n_members and member order t = self.takeFrames( fi ) result = EnsembleTraj( n_members=n_members ) result.__dict__.update( t.__dict__ ) result.n_members = n_members result.resetFrameNames() return result except TypeError: raise EnsembleTrajError('takeMembers TypeError '+\ str(mIndices)+\ "\nlenFrames: %i; n_members: %i" %(len(self), self.n_members))
def tripples(self, lst, n): """ Group items of lst into n tripples with minimal overlap. """ all = [] l = len(lst) ## get all possible tripples for i in range(l): for j in range(i + 1, l): for k in range(j + 1, l): all += [(lst[i], lst[j], lst[k])] ## calculate pairwise "distance" between tripples pw = N0.zeros((len(all), len(all)), N0.Float32) for i in range(len(all)): for j in range(i, len(all)): pw[i, j] = pw[j, i] = len(MU.intersection(all[i], all[j]))**2 pos = 0 r = [] while len(r) < n: r += [pos] ## overlap of selected tripples with all others overlap = N0.sum(N0.array([pw[i] for i in r])) ## select one with lowest overlap to all tripples selected before pos = N0.argmin(overlap) return N0.take(all, r)
class Test(BT.BiskitTest): """Test case""" def test_rmsFit(self): """rmsFit test""" from . import tools as T self.traj = T.load(T.testRoot('lig_pcr_00/traj.dat')) rt, rmsdLst = match(self.traj.ref.xyz, self.traj[-1].xyz) if self.local: print('RMSD: %.2f' % rmsdLst[0][1]) # return rotation matrix r = abs(N0.sum(N0.ravel(rt[0]))) e = abs(N0.sum(N0.ravel(self.EXPECT))) self.assertAlmostEqual(r, e, 6) EXPECT = N0.array([[ 0.9999011, 0.01311352, 0.00508244, ], [ -0.01310219, 0.99991162, -0.00225578, ], [-0.00511157, 0.00218896, 0.99998454]])
def crd2traj( self ): """ Convert coordinates into a Trajectory object. :return: trajectory object :rtype: Trajectory """ ## skip first empty line self.crd.readline() xyz = [] i = 0 if self.verbose: self.log.write( "Reading frames .." ) try: while 1==1: xyz += [ self.nextFrame() ] i += 1 if i % 100 == 0 and self.verbose: self.log.write( '#' ) except EOFError: if self.verbose: self.log.write("Read %i frames.\n" % i) t = Trajectory( refpdb=self.ref ) t.frames = N0.array( xyz ).astype(N0.Float32) t.setRef( self.ref ) t.ref.disconnect() return t
def crd2traj(self): """ Convert coordinates into a Trajectory object. :return: trajectory object :rtype: Trajectory """ ## skip first empty line self.crd.readline() xyz = [] i = 0 if self.verbose: self.log.write("Reading frames ..") try: while 1 == 1: xyz += [self.nextFrame()] i += 1 if i % 100 == 0 and self.verbose: self.log.write('#') except EOFError: if self.verbose: self.log.write("Read %i frames.\n" % i) t = Trajectory(refpdb=self.ref) t.frames = N0.array(xyz).astype(N0.Float32) t.setRef(self.ref) t.ref.disconnect() return t
def valuesOf(self, infoKey, default=None, indices=None, unique=0): """ Get all values of a certain info record of all or some Complexes. @param infoKey: key for info dict @type infoKey: str @param default: default value if infoKey is not found (None) @type default: any @param indices: list of int OR None(=all), indices of Complexes (None) @type indices: [int] OR None @param unique: report each value only once (set union), (default 0) @type unique: 1|0 @return: list of values @rtype: [any] """ l = self if indices is not None: l = N0.take(N0.array(l, 'O'), indices) if not unique: return [c.info.get(infoKey, default) for c in l] r = [] for c in l: if c.info.get(infoKey, default) not in r: r += [c.info.get(infoKey)] return r
def parse_result( self): """ Parse the SurfaceRacer output file which has the same nawe as the input pdb, but with a txt extension. The output ends up un the same folder as the input. In addition a file called result.txt is created in the same directory as the binary. @return: dictionary with curvature and surface data @rtype: dict """ curv = [] ## average curvature ms = [] ## molecular surface area asa = [] ## accessible surface area try: out_file = open( self.f_out_name ) lines = out_file.readlines() out_file.close() except: raise SurfaceRacer_Error('SurfaceRacer result file %s does not exist. You have probably encountered a very rare SurfaceRacer round off error that have caused the program to terminate. The simplest remedy to this problem is to increase the probe radii with a very small number, for example from %.3f to %.3f.'%(self.f_out_name, self.probe,self.probe+0.001 )) if len(lines) == 0: raise SurfaceRacer_Error('SurfaceRacer result file %s empty'%self.f_out_name) ## don't parse cavity information, find first occurance or 'CAVITY' end = len(lines) for i in range( len(lines)-1, 0, -1 ): if lines[i][:6]=='CAVITY': end = i for i in range( end ): curv += [ float( str.strip( lines[i][-11:-1] ) ) ] ms += [ float( str.strip( lines[i][-17:-11] ) ) ] asa += [ float( str.strip( lines[i][-24:-17] ) ) ] result = {'curvature':N0.array(curv), 'MS':N0.array(ms), 'AS':N0.array(asa), 'surfaceRacerInfo':{'probe_radius':self.probe, 'vdw_set':self.vdw_set} } ## check curvature profile integrity result['curvature'] = \ self.__checkProfileIntegrity( result['curvature'], 1.0, -1.0 ) return result
def __setstate__(self, state ): """ called for unpickling the object. """ self.__dict__ = state self.ligandMatrix = N0.array( self.ligandMatrix,N0.Float32 ) ## backwards compability self.__defaults()
def data4clustering(self): """ Apply current atom mask and return list of flattened/raveled frames. Override this method if clustering should happen by other criteria. @return: [float] or numpy.array(float) """ t = self.traj.compressAtoms(self.aMask) return N0.array(list(map(N0.ravel, t.frames)))
def __setstate__(self, state): """ called for unpickling the object. """ self.__dict__ = state self.ligandMatrix = N0.array(self.ligandMatrix, N0.Float32) ## backwards compability self.__defaults()
def area(curve, start=0.0, stop=1.0): """ Numerically add up the area under the given curve. The curve is a 2-D array or list of tupples. The x-axis is the first column of this array (curve[:,0]). (originally taken from biskit.Statistics.ROCalyzer) :param curve: a list of x,y coordinates :type curve: [ (y,x), ] or N0.array :param start: lower boundary (in x) (default: 0.0) :type start: float :param stop: upper boundary (in x) (default: 1.0) :type stop: float :return: the area underneath the curve between start and stop. :rtype: float """ ## convert and swap axes curve = N0.array(curve) c = N0.zeros(N0.shape(curve), curve.dtype) c[:, 0] = curve[:, 1] c[:, 1] = curve[:, 0] assert len(N0.shape(c)) == 2 ## apply boundaries ## here we have a problem with flat curves mask = N0.greater_equal(c[:, 1], start) mask *= N0.less_equal(c[:, 1], stop) c = N0.compress(mask, c, axis=0) ## fill to boundaries -- not absolutely accurate: we actually should ## interpolate to the neighboring points instead c = N0.concatenate((N0.array([ [c[0, 0], start], ]), c, N0.array([ [c[-1, 0], stop], ]))) x = c[:, 1] y = c[:, 0] dx = x[1:] - x[:-1] # distance on x between points dy = y[1:] - y[:-1] # distance on y between points areas1 = y[:-1] * dx # the rectangles between all points areas2 = dx * dy / 2.0 # the triangles between all points return N0.sum(areas1) + N0.sum(areas2)
def relExposure(model, absSurf, key='AS', clip=1): """ Calculate how exposed an atom is relative to the same atom in a GLY-XXX-GLY tripeptide, an approximation of the unfolded state. @param absSurf: Absolute MS OR AS values @type absSurf: [float] @param key: MS or AS @type key: MS|AS @param clip: clip values above 100% (default: 1) @type clip: 1|0 @return: rel - list of relative accessible surfaces @rtype: [float] """ if not key == 'MS' and not key == 'AS': raise Exception('Incorrect key for relative exposiure: %s ' % key) rel = [] i = 0 ## loop over chains for j in range(model.lenChains()): c = model.takeChains([j]) k = 0 cIdx = c.resIndex() ## and loop over atoms in chain for a in c.atoms.iterDicts(): ## N-terminal residue if k < cIdx[1]: rel = __Nter(a, rel, absSurf, key, i) ## C-terminal residue if k >= cIdx[-1]: rel = __Cter(a, rel, absSurf, key, i) ## everything but N- and C termini if not k < cIdx[1] and not k >= cIdx[-1]: rel = __bulk(a, rel, absSurf, key, i) i += 1 k += 1 if clip: return N0.clip(N0.array(rel), 0.0, 100.0) else: return N0.array(rel)
def relExposure( model, absSurf, key='AS', clip=1 ): """ Calculate how exposed an atom is relative to the same atom in a GLY-XXX-GLY tripeptide, an approximation of the unfolded state. @param absSurf: Absolute MS OR AS values @type absSurf: [float] @param key: MS or AS @type key: MS|AS @param clip: clip values above 100% (default: 1) @type clip: 1|0 @return: rel - list of relative accessible surfaces @rtype: [float] """ if not key=='MS' and not key=='AS': raise Exception('Incorrect key for relative exposiure: %s '%key) rel = [] i=0 ## loop over chains for j in range( model.lenChains()): c = model.takeChains([j]) k=0 cIdx = c.resIndex() ## and loop over atoms in chain for a in c.atoms.iterDicts(): ## N-terminal residue if k < cIdx[1]: rel = __Nter( a, rel, absSurf, key, i ) ## C-terminal residue if k >= cIdx[-1]: rel = __Cter( a, rel, absSurf, key, i ) ## everything but N- and C termini if not k < cIdx[1] and not k >= cIdx[-1]: rel = __bulk( a, rel, absSurf, key, i ) i+=1 k+=1 if clip: return N0.clip( N0.array(rel), 0.0, 100.0 ) else: return N0.array(rel)
def __collectFrames( self, pdbs, castAll=0 ): """ Read coordinates from list of pdb files. :param pdbs: list of file names :type pdbs: [str] :param castAll: analyze atom content of each frame for casting (default: 0) :type castAll: 0|1 :return: frames x (N x 3) Numpy array (of float) :rtype: array """ frameList = [] i = 0 atomCast = None if self.verbose: T.errWrite('reading %i pdbs...' % len(pdbs) ) refNames = self.ref.atomNames() ## cache for atom checking for f in pdbs: ## Load m = PDBModel(f) ## compare atom order & content of first frame to reference pdb if castAll or i==0: atomCast, castRef = m.compareAtoms( self.ref ) if castRef != list(range( len( self.ref ))): ## we can take away atoms from each frame but not from ref raise TrajError("Reference PDB doesn't match %s." %m.fileName) if N0.all( atomCast == list(range( len( m ))) ): atomCast = None ## no casting necessary else: if self.verbose: T.errWrite(' casting ') ## assert that frame fits reference if atomCast: m = m.take( atomCast ) ## additional check on each 100st frame if i%100 == 0 and m.atomNames() != refNames: raise TrajError("%s doesn't match reference pdb."%m.fileName ) frameList.append( m.xyz ) i += 1 if i%10 == 0 and self.verbose: T.errWrite('#') if self.verbose: T.errWrite( 'done\n' ) ## convert to 3-D Numpy Array return N0.array(frameList).astype(N0.Float32)
def removeMembers( self, indices ): """ Remove given member trajectories from this ensemble. :param indices: trajectory (member) numbers :type indices: [int] """ i = list(range( self.n_members)) i.remove( N0.array(indices) ) self.keepMembers( i )
def removeFrames( self, indices ): """ Remove given frames from this trajectory object. :param indices: frame numbers :type indices: [int] """ i = list(range( self.lenFrames())) i.remove( N0.array(indices) ) self.keepFrames( i )
def test_mathUtils(self): """mathUtils.polar/euler test""" ## Calculating something .. self.d = N0.array([[20., 30., 40.], [23., 31., 50.]]) self.a = polarToCartesian(cartesianToPolar(self.d)) self.t = eulerRotation(self.a[0][0], self.a[0][1], self.a[0][2]) self.assertAlmostEqual(N0.sum(SD(self.a)), self.EXPECT)
def test_mathUtils(self): """mathUtils.polar/euler test""" ## Calculating something .. self.d = N0.array([[20.,30.,40.],[23., 31., 50.]]) self.a = polarToCartesian( cartesianToPolar( self.d ) ) self.t = eulerRotation( self.a[0][0], self.a[0][1], self.a[0][2] ) self.assertAlmostEqual( N0.sum( SD(self.a) ), self.EXPECT )
def area(curve, start=0.0, stop=1.0 ): """ Numerically add up the area under the given curve. The curve is a 2-D array or list of tupples. The x-axis is the first column of this array (curve[:,0]). (originally taken from biskit.Statistics.ROCalyzer) :param curve: a list of x,y coordinates :type curve: [ (y,x), ] or N0.array :param start: lower boundary (in x) (default: 0.0) :type start: float :param stop: upper boundary (in x) (default: 1.0) :type stop: float :return: the area underneath the curve between start and stop. :rtype: float """ ## convert and swap axes curve = N0.array( curve ) c = N0.zeros( N0.shape(curve), curve.dtype ) c[:,0] = curve[:,1] c[:,1] = curve[:,0] assert len( N0.shape( c ) ) == 2 ## apply boundaries ## here we have a problem with flat curves mask = N0.greater_equal( c[:,1], start ) mask *= N0.less_equal( c[:,1], stop ) c = N0.compress( mask, c, axis=0 ) ## fill to boundaries -- not absolutely accurate: we actually should ## interpolate to the neighboring points instead c = N0.concatenate((N0.array([[c[0,0], start],]), c, N0.array([[c[-1,0],stop ],])) ) x = c[:,1] y = c[:,0] dx = x[1:] - x[:-1] # distance on x between points dy = y[1:] - y[:-1] # distance on y between points areas1 = y[:-1] * dx # the rectangles between all points areas2 = dx * dy / 2.0 # the triangles between all points return N0.sum(areas1) + N0.sum(areas2)
def convertChainIdsCter(self, model, chains): """ Convert normal chain ids to chain ids considering chain breaks. """ if len(chains) == 0: return chains ## fetch last atom of given chains index = N0.concatenate((model.chainIndex(), [len(model)])) i = N0.take(index, N0.array(chains) + 1) - 1 ## convert back to chain indices but this time including chain breaks return model.atom2chainIndices(i, breaks=1)
def convertChainIdsCter( self, model, chains ): """ Convert normal chain ids to chain ids considering chain breaks. """ if len(chains) == 0: return chains ## fetch last atom of given chains index = N0.concatenate( (model.chainIndex(), [len(model)]) ) i = N0.take( index, N0.array( chains ) + 1 ) - 1 ## convert back to chain indices but this time including chain breaks return model.atom2chainIndices( i, breaks=1 )
def parse_result( self ): """ Parse the Prosa2003 output file. @return: dictionary with the calculated potential profiles and the parameters used @rtype: dict """ prosa_pair = [] prosa_surf = [] prosa_tot = [] prosaout = self.prosaOutput + '.ana' lines = [] try: lines = open( prosaout ).readlines() if not lines: raise IOError('File %s is empty'%( prosaout )) except IOError as why: raise IOError("Couldn't read Prosa result: " + str( why ) \ + '\n Check the Prosa license!') ## comment lines starts with '#' for i in range( len(lines) ): if lines[i][0] != '#': nr, pair, surf, tot = str.split( lines[i]) prosa_pair += [ float( pair ) ] prosa_surf += [ float( surf ) ] prosa_tot += [ float( tot ) ] ## create dictionary with residue profiles and calc. info result = {'prosa_pair':N0.array(prosa_pair), 'prosa_surf':N0.array(prosa_surf), 'prosa_tot':N0.array(prosa_tot), 'ProsaInfo':{ 'lower_k':self.lower_k, 'upper_k':self.upper_k, 'pot_lb':self.pot_lb, 'pot_ub':self.pot_ub } } return result
def __getstate__(self): """ Called before pickling the object. """ try: if type( self.frames ) == list or self.frames.dtype.char == 'd': EHandler.warning("Converting coordinates to float array.") self.frames = N0.array( self.frames ).astype(N0.Float32) except: EHandler.warning('Could not convert frames to float array.', 1) return self.__dict__
def runningAverage(x, interval=2, preserve_boundaries=0): """ Running average (smoothing) over a given data window. :param x: data :type x: list of int/float :param interval: window size C{ (-(interval-1)/2 to +(interval-1)/2) } (default: 2) :type interval: int :param preserve_boundaries: shrink window at edges to keep original start and end value (default: 0) :type preserve_boundaries: 0|1 :return: list of floats :rtype: [ float ] """ if interval == 0: return x l = [] interval = int((interval - 1) / 2) if not preserve_boundaries: for i in range(len(x)): left = max(0, i - interval) right = min(len(x), i + interval + 1) slice = x[left:right] l.append(N0.average(slice)) else: for i in range(len(x)): left = i - interval right = i + interval + 1 if left < 0: right = right + left left = 0 if right > len(x): left = left + right - len(x) right = len(x) slice = x[left:right] l.append(N0.average(slice)) return N0.array(l)
def runningAverage( x, interval=2, preserve_boundaries=0 ): """ Running average (smoothing) over a given data window. :param x: data :type x: list of int/float :param interval: window size C{ (-(interval-1)/2 to +(interval-1)/2) } (default: 2) :type interval: int :param preserve_boundaries: shrink window at edges to keep original start and end value (default: 0) :type preserve_boundaries: 0|1 :return: list of floats :rtype: [ float ] """ if interval == 0: return x l = [] interval = int((interval-1)/2) if not preserve_boundaries: for i in range(len(x)): left = max(0, i - interval) right = min(len(x), i + interval + 1) slice = x[left:right] l.append(N0.average(slice)) else: for i in range( len(x) ): left = i - interval right= i + interval + 1 if left < 0: right = right + left left = 0 if right > len(x): left = left + right - len(x) right = len(x) slice = x[left:right] l.append(N0.average(slice)) return N0.array(l)
def averageContacts( self, step=10, cutoff=4.5 ): """ Use:: averageContacts( step=1, cutoff=4.5 ) @param step: take only each |step|th frame (default: 10) @type step: int @param cutoff: distance cutoff in Angstrom (default: 4.5) @type cutoff: float @return: contact matrix with frequency of each contact in (thinned) traj. @rtype: matrix """ r = [ self.atomContacts( i, cutoff=cutoff ) for i in range(0, len(self), step ) ] return N0.sum( N0.array( r ) ) / ( 1. * len(r) )
def rtTuple2matrix( self, r, t): """ Put rotation and translation matrix into single 4x4 matrix. @param r: rotation matric, array 3x3 of float @type r: array @param t: translation vector, array 1x3 of float @type t: vector @return: rotation/translation matrix, array 4x4 of float @rtype: array """ ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate( (r, N0.transpose( [ t.tolist() ] )), 1) ## make it square result = N0.concatenate( (result, N0.array([[0,0,0,1]],N0.Float32)), 0) return result.astype(N0.Float32)
def __random_matrix( self ): """ Random rotation matrix. @return: 4 x 4 array of float, random rotation and translation matrix @rtype: array """ r = ma.randomRotation() ## r = N0.array([[1,0,0],[0,1,0],[0,0,1]],'f') t = self.__random_translation() ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate( (r, N0.transpose( [ t.tolist() ] )), 1) ## make it square result = N0.concatenate( (result, N0.array([[0,0,0,1]], N0.Float32)), 0 ) return result
def hex2int( shex ): """ Convert hex-code string into float number. :param s: hex-code, e.g. 'FF0B99' :type s: str :return: float :rtype: float """ shex = shex.replace('0x','') factors = [ 16**(i) for i in range(len(shex)) ] factors.reverse() factors = N0.array( factors ) table = dict( list(zip('0123456789abcdef',list(range(16)))) ) components = [ table[s]*f for s,f in zip( shex.lower(), factors ) ] return N0.sum( components )
def variance(x, avg=None): """ Variance, S{sigma}^2 :param x: data :type x: array('f') or float :param avg: use this average, otherwise calculated from x :type avg: float OR None :return: float :rtype: float """ if avg is None: avg = N0.average(x) if len(x) == 1: return 0.0 return N0.sum(N0.power(N0.array(x) - avg, 2)) / (len(x) - 1.)
def variance(x, avg = None): """ Variance, S{sigma}^2 :param x: data :type x: array('f') or float :param avg: use this average, otherwise calculated from x :type avg: float OR None :return: float :rtype: float """ if avg is None: avg = N0.average(x) if len(x) == 1: return 0.0 return N0.sum(N0.power(N0.array(x) - avg, 2)) / (len(x) - 1.)
def rtTuple2matrix(self, r, t): """ Put rotation and translation matrix into single 4x4 matrix. @param r: rotation matric, array 3x3 of float @type r: array @param t: translation vector, array 1x3 of float @type t: vector @return: rotation/translation matrix, array 4x4 of float @rtype: array """ ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate((r, N0.transpose([t.tolist()])), 1) ## make it square result = N0.concatenate((result, N0.array([[0, 0, 0, 1]], N0.Float32)), 0) return result.astype(N0.Float32)
def transform( self, *rt ): """ Apply given transformation to all frames (in place). :param rt: rotation translation matrix :type rt: array( 4 x 4 ) OR array(3 x 3), array(3 x 1) """ if len(rt) == 2: r, t = rt[0], rt[1] else: rt = rt[0] r, t = (rt[0:3,0:3], rt[0:3, 3]) r = N0.transpose( r ) r = r.astype(N0.Float32) t = t.astype(N0.Float32) for i in range( len( self.frames ) ): self.frames[ i ] = N0.array( N0.dot( self.frames[i], r ) ) + t
def __init__(self, data, n_cluster, weight, seedx = 0, seedy = 0): """ @param data: cluster this @type data: [float] OR array @param n_cluster: number of clusters @type n_cluster: int @param weight: fuzziness weigth @type weight: float @param seedx: random seed value for RandomArray.seed (default: 0) @type seedx: int OR 0 @param seedy: random seed value for RandomArray.seed (default: 0, set seed from clock) @type seedy: int OR 0 """ self.data = N0.array(data, N0.Float) self.w = weight self.n_cluster = n_cluster self.npoints, self.dimension = N0.shape(data) self.seedx = seedx self.seedy = seedy
def test_AmberParmMirror(self): """AmberParmBuilder.parmMirror test""" ref = self.ref mask = N0.logical_not( ref.maskH2O() ) ## keep protein and Na+ ion self.mdry = ref.compress( mask ) self.a = AmberParmBuilder( self.mdry, verbose=self.local, leap_out=self.leapout, debug=self.DEBUG ) self.a.parmMirror(f_out=self.dryparm, f_out_crd=self.drycrd ) self.a.parm2pdb( self.dryparm, self.drycrd, self.drypdb ) self.m1 = PDBModel(self.drypdb) self.m2 = PDBModel(self.refdry) eq = N0.array( self.m1.xyz == self.m2.xyz ) self.assertTrue( eq.all() )
def __init__(self, values): """ @param values: color mapping for each color used @type values: [(float, int)] """ if not biggles: raise ImportError('biggles module could not be imported.') FramedPlot.__init__(self) values = N0.array(values) self.frame.draw_spine = 1 n_values = 4 ## number of labeled ticks in legend step = len(values) // (n_values - 1) + 1 indices = list(range(0, len(values), step)) indices.append(len(values) - 1) labels = ['%.1f' % values[i, 0] for i in indices] self.y.ticks = len(labels) self.y1.ticklabels = labels self.y2.draw_ticks = 0 self.x.draw_ticks = 0 self.x.ticklabels = [] i = 2 x = (2, 3) for value, color in values: y1 = (i, i) y2 = (i + 1, i + 1) cell = biggles.FillBetween(x, y1, x, y2, color = int(color)) self.add(cell) i += 1
def density(x, nBins, range = None, steps = 1, hist = 0): """ returns the normalized histogram of x:: density( data, nBins [,range=None, steps=1, hist=0|1] ) -> array :param x: data list or array :type x: [any] :param nBins: number of bins :type nBins: int :param range: data range to create histogram from (min val, max val) :type range: (float, float) OR None :param steps: 1: histogram appears as a discrete graph (default 1) :type steps: 1|0 :param hist: 0: normalize histogram (default 0) :type hist: 1|0 :return: array (2 x len(data) ) with start of bin and witdh of bin. :rtype: array """ h = histogram(x, nBins, range) binWidth = h[1,0] - h[0,0] if not hist: i = N0.sum(h)[1]*binWidth h[:,1] = h[:,1]/i if steps: half = (h[1][0]-h[0][0])/2 l = [(h[0][0]-half,0)] for row in h: l.append((row[0]-half,row[1])) l.append((row[0]+half,row[1])) l.append((h[-1][0]+half,0)) h = l return N0.array(h)