def usr_descriptors(points): """return 12-tuple of geoemtric descriptors for points Reference for method: Ballester, PJ & Richards, WG (2007) Proc.R.Soc.A doi:10.1098/rspa.2007.1823 """ # centroid ctr = centroid(points) ctr_da = dist_array(ctr, points) ctr_m, ctr_v, ctr_s = mean_var_skew(ctr_da) # closest to centroid cst = points[N.argmin(ctr_da)] cst_da = dist_array(cst, points) cst_m, cst_v, cst_s = mean_var_skew(cst_da) # farthest from centroid fct = points[N.argmax(ctr_da)] fct_da = dist_array(fct, points) fct_m, fct_v, fct_s = mean_var_skew(fct_da) # farthest from fct ftf = points[N.argmax(fct_da)] ftf_da = dist_array(ftf, points) ftf_m, ftf_v, ftf_s = mean_var_skew(ftf_da) return (ctr_m, ctr_v, ctr_s, cst_m, cst_v, cst_s, fct_m, fct_v, fct_s, ftf_m, ftf_v, ftf_s)
def getFreq(self, seconds): if self.fake: base = 300 if random.random() < .2: freq = base + randint(-50, 50) else: freq = base + randint(-200, 200) #freq = (random.random() * 400) + 100.0 distance = freq * 0.0051 - 0.0472 return (distance, freq, 1, 1, 1, 1) data = self.read(seconds) self.timestamp = time.time() transform = FFT.real_fft(data).real minFreq = 20 maxFreq = 700 minFreqPos = int(minFreq * seconds) maxFreqPos = int(maxFreq * seconds) minFreqPos = max(0, minFreqPos) maxFreqPos = min(int(self.sample_rate * seconds), maxFreqPos) if minFreqPos == maxFreqPos: self.lastFreq = int(self.sample_rate * sampleTime / 2.0) return elif minFreqPos > maxFreqPos: minFreqPos, maxFreqPos = maxFreqPos, minFreqPos freqPos = Numeric.argmax(transform[1 + minFreqPos:maxFreqPos]) value = transform[1 + minFreqPos:maxFreqPos][freqPos] freq = int((freqPos + minFreqPos) / seconds) distance = freq * 0.0051 - 0.0472 bestFreqPos = Numeric.argmax(transform[1:]) bestValue = transform[1:][bestFreqPos] bestFreq = int(bestFreqPos / seconds) return (distance, freq, value, transform[0], bestFreq, bestValue)
def computeEndPointsFromChunk(self, chunk, update = True): """ Derives and returns the endpoints and radius of a Peptide chunk. @param chunk: a Peptide chunk @type chunk: Chunk @return: endPoint1, endPoint2 and radius @rtype: Point, Point and float @note: computing the endpoints works fine when n=m or m=0. Otherwise, the endpoints can be slightly off the central axis, especially if the Peptide is short. @attention: endPoint1 and endPoint2 may not be the original endpoints, and they may be flipped (opposites of) the original endpoints. """ # Since chunk.axis is not always one of the vectors chunk.evecs # (actually chunk.poly_evals_evecs_axis[2]), it's best to just use # the axis and center, then recompute a bounding cylinder. if not chunk.atoms: return None axis = chunk.axis axis = norm(axis) # needed center = chunk._get_center() points = chunk.atpos - center # not sure if basepos points are already centered # compare following Numeric Python code to findAtomUnderMouse and its caller matrix = matrix_putting_axis_at_z(axis) v = dot( points, matrix) # compute xy distances-squared between axis line and atom centers r_xy_2 = v[:,0]**2 + v[:,1]**2 # to get radius, take maximum -- not sure if max(r_xy_2) would use Numeric code, but this will for sure: i = argmax(r_xy_2) max_xy_2 = r_xy_2[i] radius = sqrt(max_xy_2) # to get limits along axis (since we won't assume center is centered between them), use min/max z: z = v[:,2] min_z = z[argmin(z)] max_z = z[argmax(z)] # Adjust the endpoints such that the ladder rungs (rings) will fall # on the ring segments. # TO DO: Fix drawPeptideLadder() to offset the first ring, then I can # remove this adjustment. --Mark 2008-04-12 z_adjust = self.getEndPointZOffset() min_z += z_adjust max_z -= z_adjust endpoint1 = center + min_z * axis endpoint2 = center + max_z * axis if update: #print "Original endpoints:", self.getEndPoints() self.setEndPoints(endpoint1, endpoint2) #print "New endpoints:", self.getEndPoints() return (endpoint1, endpoint2, radius)
def compute_memo(self, chunk): """ If drawing chunk in this display mode can be optimized by precomputing some info from chunk's appearance, compute that info and return it. If this computation requires preference values, access them as env.prefs[key], and that will cause the memo to be removed (invalidated) when that preference value is changed by the user. This computation is assumed to also depend on, and only on, chunk's appearance in ordinary display modes (i.e. it's invalidated whenever havelist is). There is not yet any way to change that, so bugs will occur if any ordinarily invisible chunk info affects this rendering, and potential optimizations will not be done if any ordinarily visible info is not visible in this rendering. These can be fixed if necessary by having the real work done within class Chunk's _recompute_ rules, with this function or drawchunk just accessing the result of that (and sometimes causing its recomputation), and with whatever invalidation is needed being added to appropriate setter methods of class Chunk. If the real work can depend on more than chunk's ordinary appearance can, the access would need to be in drawchunk; otherwise it could be in drawchunk or in this method compute_memo. """ # for this example, we'll turn the chunk axes into a cylinder. # Since chunk.axis is not always one of the vectors chunk.evecs (actually chunk.poly_evals_evecs_axis[2]), # it's best to just use the axis and center, then recompute a bounding cylinder. if not chunk.atoms: return None axis = chunk.axis axis = norm( axis ) # needed (unless we're sure it's already unit length, which is likely) center = chunk.center points = chunk.atpos - center # not sure if basepos points are already centered # compare following Numeric Python code to findAtomUnderMouse and its caller matrix = matrix_putting_axis_at_z(axis) v = dot(points, matrix) # compute xy distances-squared between axis line and atom centers r_xy_2 = v[:, 0]**2 + v[:, 1]**2 ## r_xy = sqrt(r_xy_2) # not needed # to get radius, take maximum -- not sure if max(r_xy_2) would use Numeric code, but this will for sure: i = argmax(r_xy_2) max_xy_2 = r_xy_2[i] radius = sqrt(max_xy_2) # to get limits along axis (since we won't assume center is centered between them), use min/max z: z = v[:, 2] min_z = z[argmin(z)] max_z = z[argmax(z)] bcenter = chunk.abs_to_base(center) # return, in chunk-relative coords, end1, end2, and radius of the cylinder, and color. color = chunk.color if color is None: color = V(0.5, 0.5, 0.5) # make sure it's longer than zero (in case of a single-atom chunk); in fact, add a small margin all around # (note: this is not sufficient to enclose all atoms entirely; that's intentional) margin = 0.2 min_z -= margin max_z += margin radius += margin return (bcenter + min_z * axis, bcenter + max_z * axis, radius, color)
def compute_memo(self, chunk): """ If drawing chunk in this display mode can be optimized by precomputing some info from chunk's appearance, compute that info and return it. If this computation requires preference values, access them as env.prefs[key], and that will cause the memo to be removed (invalidated) when that preference value is changed by the user. This computation is assumed to also depend on, and only on, chunk's appearance in ordinary display modes (i.e. it's invalidated whenever havelist is). There is not yet any way to change that, so bugs will occur if any ordinarily invisible chunk info affects this rendering, and potential optimizations will not be done if any ordinarily visible info is not visible in this rendering. These can be fixed if necessary by having the real work done within class Chunk's _recompute_ rules, with this function or drawchunk just accessing the result of that (and sometimes causing its recomputation), and with whatever invalidation is needed being added to appropriate setter methods of class Chunk. If the real work can depend on more than chunk's ordinary appearance can, the access would need to be in drawchunk; otherwise it could be in drawchunk or in this method compute_memo. """ # for this example, we'll turn the chunk axes into a cylinder. # Since chunk.axis is not always one of the vectors chunk.evecs (actually chunk.poly_evals_evecs_axis[2]), # it's best to just use the axis and center, then recompute a bounding cylinder. if not chunk.atoms: return None axis = chunk.axis axis = norm(axis) # needed (unless we're sure it's already unit length, which is likely) center = chunk.center points = chunk.atpos - center # not sure if basepos points are already centered # compare following Numeric Python code to findAtomUnderMouse and its caller matrix = matrix_putting_axis_at_z(axis) v = dot( points, matrix) # compute xy distances-squared between axis line and atom centers r_xy_2 = v[:,0]**2 + v[:,1]**2 ## r_xy = sqrt(r_xy_2) # not needed # to get radius, take maximum -- not sure if max(r_xy_2) would use Numeric code, but this will for sure: i = argmax(r_xy_2) max_xy_2 = r_xy_2[i] radius = sqrt(max_xy_2) # to get limits along axis (since we won't assume center is centered between them), use min/max z: z = v[:,2] min_z = z[argmin(z)] max_z = z[argmax(z)] bcenter = chunk.abs_to_base(center) # return, in chunk-relative coords, end1, end2, and radius of the cylinder, and color. color = chunk.color if color is None: color = V(0.5,0.5,0.5) # make sure it's longer than zero (in case of a single-atom chunk); in fact, add a small margin all around # (note: this is not sufficient to enclose all atoms entirely; that's intentional) margin = 0.2 min_z -= margin max_z += margin radius += margin return (bcenter + min_z * axis, bcenter + max_z * axis, radius, color)
def centerFrames( self ): """ Get indices for frame nearest to each cluster center. @return: list of cluster center indecies @rtype: [int] """ return N.argmax( self.memberships(), 1 )
def centerFrames(self): """ Get indices for frame nearest to each cluster center. @return: list of cluster center indecies @rtype: [int] """ return N.argmax(self.memberships(), 1)
def findQuaternionMatrix(collection, point_ref, conf1, conf2 = None, matrix = True ): universe = collection.universe() if conf1.universe != universe: raise ValueError, "conformation is for a different universe" if conf2 is None: conf1, conf2 = conf2, conf1 else: if conf2.universe != universe: raise ValueError, "conformation is for a different universe" ref = conf1 conf = conf2 weights = universe.masses() weights = weights/collection.mass() ref_cms = point_ref.position().array pos = N.zeros((3,), N.Float) pos = point_ref.position(conf).array possq = 0. cross = N.zeros((3, 3), N.Float) for a in collection.atomList(): r = a.position(conf).array - pos r_ref = a.position(ref).array-ref_cms w = weights[a] possq = possq + w*N.add.reduce(r*r) \ + w*N.add.reduce(r_ref*r_ref) cross = cross + w*r[:, N.NewAxis]*r_ref[N.NewAxis, :] k = N.zeros((4, 4), N.Float) k[0, 0] = -cross[0, 0]-cross[1, 1]-cross[2, 2] k[0, 1] = cross[1, 2]-cross[2, 1] k[0, 2] = cross[2, 0]-cross[0, 2] k[0, 3] = cross[0, 1]-cross[1, 0] k[1, 1] = -cross[0, 0]+cross[1, 1]+cross[2, 2] k[1, 2] = -cross[0, 1]-cross[1, 0] k[1, 3] = -cross[0, 2]-cross[2, 0] k[2, 2] = cross[0, 0]-cross[1, 1]+cross[2, 2] k[2, 3] = -cross[1, 2]-cross[2, 1] k[3, 3] = cross[0, 0]+cross[1, 1]-cross[2, 2] for i in range(1, 4): for j in range(i): k[i, j] = k[j, i] k = 2.*k for i in range(4): k[i, i] = k[i, i] + possq - N.add.reduce(pos*pos) import numpy.oldnumeric.linear_algebra as LinearAlgebra e, v = LinearAlgebra.eigenvectors(k) i = N.argmin(e) v = v[i] if v[0] < 0: v = -v if e[i] <= 0.: rms = 0. else: rms = N.sqrt(e[i]) if matrix: emax = N.argmax(e) QuatMatrix = v return Quaternion.Quaternion(QuatMatrix),v, e, e[i],e[emax], rms else: return Quaternion.Quaternion(v), Vector(ref_cms), Vector(pos), rms
def argmax(self, key): """ @param key: item attribute @type key: any @return: index of item with highest item[key] value @rtype: int """ vLst = self.valuesOf(key) return N.argmax(vLst)
def coef_maxCut(self, appxCoef): """returns the coefficients different from zero up to the abs. max. coefficient where the first coefficient is excluded from finding the max. accepts 2d matrix of coefficients where rows represent different curves """ assert len(appxCoef.shape) == 2 k = Numeric.shape(appxCoef)[1] maxInd = Numeric.argmax(Numeric.absolute(appxCoef[:, 1:]), 1) + 1 lowDiagOnes = Numeric.fromfunction(lambda i, j: i >= j, (k, k)) coefSelector = Numeric.take(lowDiagOnes, maxInd, 0) return appxCoef * coefSelector
def coef_maxCut(self, appxCoef): """returns the coefficients different from zero up to the abs. max. coefficient where the first coefficient is excluded from finding the max. accepts 2d matrix of coefficients where rows represent different curves """ assert len(appxCoef.shape) == 2 k = Numeric.shape(appxCoef)[1] maxInd = Numeric.argmax(Numeric.absolute(appxCoef[:,1:]),1) + 1 lowDiagOnes = Numeric.fromfunction(lambda i,j: i>=j, (k,k)) coefSelector = Numeric.take(lowDiagOnes, maxInd, 0) return appxCoef*coefSelector
def argmax( self, infoKey ): """ Get index of complex c with highest c.infos[infokey] value @param infoKey: key for info dict @type infoKey: str @return: index of complex c with highest c.infos[infokey] value @rtype: int """ vLst = self.valuesOf( infoKey ) return N.argmax( vLst )
def get_max_distance_residue(self, residue): """ Get the residue with maxmum distance from supplied residue, from the data members already computed by calc_dist_matrix() Paremeters: residue = Bio.PDB residue to get min distance to Uses data members (readonly): index_map reverse_index_map dist_matrix Return value: Bio.PDB residue that has max distance from supplied residue """ row = self.index_map[residue] maxdist_index = Numeric.argmax(self.dist_matrix[row]) maxdist_residue = self.reverse_index_map[maxdist_index] return maxdist_residue
def memberFrames(self, threshold=0.): """ Get indices of all frames belonging to each cluster. Each frame is guaranteed to belong, at least, to the cluster for which it has its maximum membership. If threshold > 0, it can additionally pop up in other clusters. @param threshold: minimal cluster membership or 0 to consider only max membership (default: 0) @type threshold: float @return: n_cluster, lst of lst of int, frame indices @rtype: [[int]] """ ## best cluster for each frame msm = self.memberships() maxMemb = N.argmax(msm, 0) r = [N.nonzero(N.equal(maxMemb, i)) for i in range(0, self.n_clusters)] r = [x.tolist() for x in r] ## same thing but now taking all above threshold ## -> same frame can end up in several clusters if threshold > 0.: r2 = [N.nonzero(N.greater(l, threshold)) for l in msm] ## add only additional frames for i in range(0, len(r)): try: frames = r[i].tolist() except: frames = r[i] r[i] = frames + [fr for fr in r2[i] if fr not in r[i]] ## sort frames within each cluster by their membership r = [self.membershipSort(r[i], i) for i in range(0, len(r))] return r
def memberFrames( self, threshold=0. ): """ Get indices of all frames belonging to each cluster. Each frame is guaranteed to belong, at least, to the cluster for which it has its maximum membership. If threshold > 0, it can additionally pop up in other clusters. @param threshold: minimal cluster membership or 0 to consider only max membership (default: 0) @type threshold: float @return: n_cluster, lst of lst of int, frame indices @rtype: [[int]] """ ## best cluster for each frame msm = self.memberships() maxMemb = N.argmax( msm, 0 ) r = [N.nonzero( N.equal(maxMemb, i) ) for i in range(0, self.n_clusters)] r = [ x.tolist() for x in r ] ## same thing but now taking all above threshold ## -> same frame can end up in several clusters if threshold > 0.: r2 = [ N.nonzero( N.greater( l, threshold) ) for l in msm ] ## add only additional frames for i in range(0, len( r ) ): try: frames = r[i].tolist() except: frames = r[i] r[i] = frames + [ fr for fr in r2[i] if fr not in r[i] ] ## sort frames within each cluster by their membership r = [ self.membershipSort( r[i], i) for i in range(0, len(r) )] return r
def parse_result( self ): """ Extract some information about the profile as well as the match state emmission scores. Keys of the returned dictionary:: 'AA', 'name', 'NrSeq', 'emmScore', 'accession', 'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum' @return: dictionary with warious information about the profile @rtype: dict """ ## check that the outfut file is there and seems valid if not os.path.exists( self.f_out ): raise HmmerError,\ 'Hmmerfetch result file %s does not exist.'%self.f_out if T.fileLength( self.f_out ) < 10: raise HmmerError,\ 'Hmmerfetch result file %s seems incomplete.'%self.f_out profileDic = {} ## read result hmm = open( self.f_out, 'r') out = hmm.read() hmm.close() ## collect some data about the hmm profile profileDic['name'] = self.hmmName profileDic['profLength'] = \ int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] ) profileDic['accession'] = \ string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] profileDic['NrSeq'] = \ int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] ) profileDic['AA'] = \ string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:] ## collect null emmission scores pattern = 'NULE[ ]+' + '[-0-9]+[ ]+'*20 nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ] ## get emmision scores prob=[] for i in range(1, profileDic['profLength']+1): pattern = "[ ]+%i"%i + "[ ]+[-0-9]+"*20 e = [ float(j) for j in string.split(re.findall(pattern, out)[0]) ] prob += [ e ] profileDic['seqNr'] = N.transpose( N.take( prob, (0,),1 ) ) profileDic['emmScore'] = N.array(prob)[:,1:] ## calculate emission probablitities emmProb, nullProb = self.hmmEmm2Prob( nullEmm, profileDic['emmScore']) ent = [ N.resize( self.entropy(e, nullProb), (1,20) )[0] for e in emmProb ] profileDic['ent'] = N.array(ent) ###### TEST ##### proba = N.array(prob)[:,1:] ## # test set all to max score ## p = proba ## p1 = [] ## for i in range( len(p) ): ## p1 += [ N.resize( p[i][N.argmax( N.array( p[i] ) )] , N.shape( p[i] ) ) ] ## profileDic['maxAll'] = p1 # test set all to N.sum( abs( probabilities ) ) p = proba p2 = [] for i in range( len(p) ) : p2 += [ N.resize( N.sum( N.absolute( p[i] )), N.shape( p[i] ) ) ] profileDic['absSum'] = p2 # set all to normalized max score p = proba p4 = [] for i in range( len(p) ) : p_scale = (p[i] - N.average(p[i]) )/ math.SD(p[i]) p4 += [ N.resize( p_scale[N.argmax( N.array(p_scale) )] , N.shape( p[i] ) ) ] profileDic['maxAllScale'] = p4 return profileDic
def second_peak_bin(self): """ Return the bin with the second peak in the distribution. Unlike second_max_value_bin(), it does not return a bin which is the second largest value, if laying on a wing of the first peak, the second peak is returned only if the distribution is truly multimodal. If it isn't, return the first peak (for compatibility with numpy array type, and plotting compatibility), however the correspondong selectivity will be forced to 0.0 """ l = len( self._data ) if l <= 1: return self._data.keys()[ 0 ] ks = self._data.keys() ks.sort() ik0 = ks.index( self._data.keys()[ argmax( self._data.values() ) ] ) k0 = ks[ ik0 ] v0 = self._data[ k0 ] v = v0 k = k0 ik = ik0 while self._data[ k ] <= v: ik += 1 if ik >= l: ik = 0 if ik == ik0: return k0 v = self._data[ k ] k = ks[ ik ] ik1 = ik v = v0 k = k0 ik = ik0 while self._data[ k ] <= v: ik -= 1 if ik < 0: ik = l - 1 if ik == ik0: return k0 v = self._data[ k ] k = ks[ ik ] ik2 = ik if ik1 == ik2: return ks[ ik1 ] ik = ik1 m = 0 while ik != ik2: k = ks[ ik ] if self._data[ k ] > m: m = self._data[ k ] im = ik ik += 1 if ik >= l: ik = 0 return ks[ im ]
def max_value_bin(self): """Return the bin with the largest value.""" return self._data.keys()[argmax(self._data.values())]
def farthest(point, points): """works, but @@DEPRECATED!! return index into points of farthest-from-point """ da = dist_array(point, points) return N.argmax(da)
def max(self): """ Max height of distribution. """ index = N.argmax(self.p) return self.x[index]
def second_peak_bin(self, d): """ Return the bin with the second peak in the distribution. Unlike second_max_value_bin(), it does not return a bin which is the second largest value, if laying on a wing of the first peak, the second peak is returned only if the distribution is truly multimodal. If it isn't, return the first peak (for compatibility with numpy array type, and plotting compatibility), however the correspondong selectivity will be forced to 0.0 """ h = d._data l = len(h) if l <= 1: return h.keys()[0] ks = h.keys() ks.sort() ik0 = ks.index(h.keys()[argmax(h.values())]) k0 = ks[ik0] v0 = h[k0] v = v0 k = k0 ik = ik0 while h[k] <= v: ik += 1 if ik >= l: ik = 0 if ik == ik0: return k0 v = h[k] k = ks[ik] ik1 = ik v = v0 k = k0 ik = ik0 while h[k] <= v: ik -= 1 if ik < 0: ik = l - 1 if ik == ik0: return k0 v = h[k] k = ks[ik] ik2 = ik if ik1 == ik2: return ks[ik1] ik = ik1 m = 0 while ik != ik2: k = ks[ik] if h[k] > m: m = h[k] im = ik ik += 1 if ik >= l: ik = 0 return ks[im]
def getrotation(self): #bruce 050518 new feature for showing rotation of rmotor in its cap-arrow """ Return a rotation angle for the motor. This is arbitrary, but rotates smoothly with the atoms, averaging out their individual thermal motion. It is not history-dependent -- e.g. it will be consistent regardless of how you jump around among the frames of a movie. But if we ever implement remaking or revising the motor position, or if you delete some of the motor's atoms, this angle is forgotten and essentially resets to 0. (That could be fixed, and the angle even saved in the mmp file, if desired. See code comments for other possible improvements.) """ # possible future enhancements: # - might need to preserve rotation when we forget old posns, by setting an arb offset then; # - might need to preserve it in mmp file?? # - might need to draw it into PovRay file?? # - might need to preserve it when we translate or rotate entire jig with its atoms (doing which is NIM for now) # - could improve and generalize alg, and/or have sim do it (see comments below for details). # posns = A(map( lambda a: a.posn(), self.atoms )) posns -= self.center if self._initial_posns is None: # (we did this after -= center, so no need to forget posns if we translate the entire jig) self._initial_posns = posns # note, we're storing *relative* positions, in spite of the name! self._initial_quats = None # compute these the first time they're needed (since maybe never needed) return 0.0 # returning this now (rather than computing it below) is just an optim, in theory assert len(self._initial_posns) == len(posns), "bug in invalidating self._initial_posns when rmotor atoms change" if not (self._initial_posns != posns): # have to use not (x != y) rather than (x == y) due to Numeric semantics! # no (noticable) change in positions - return quickly # (but don't change stored posns, in case this misses tiny changes which could accumulate over time) # (we do this before the subsequent stuff, to not waste redraw time when posns don't change; # just re correctness, we could do it at a later stage) return 0.0 # now we know the posns are different, and we have the old ones to compare them to. posns = self.norm_project_posns( posns) # this might modify posns object, and might return same or different object quats = self._initial_quats if quats is None: # precompute a quat to rotate new posns into a standard coord system for comparison to old ones # (Q args must be orthonormal and right-handed) oldposns = + self._initial_posns # don't modify those stored initial posns # (though it probably wouldn't matter if we did -- from now on, # they are only compared to None and checked for length, as of 050518) oldposns = self.norm_project_posns( oldposns) axis = self.axis quats = self._initial_quats = [ Q(axis,pos1,cross(axis,pos1)) for pos1 in oldposns ] angs = [] for qq, pos2 in zip( self._initial_quats, posns): npos2 = qq.unrot(pos2) # now npos2 is in yz plane, and pos1 (if transformed) would just be the y axis in that plane; # just get its angle in that plane (defined so that if pos2 = pos1, ie npos2 = (0,1,0), then angle is 0) ang = angle(npos2[1], npos2[2]) # in degrees angs.append(ang) # now average these angles, paying attention to their being on a circle # (which means the average of 1 and 359 is 0, not 180!) angs.sort() # Warning: this sort is only correct since we know they're in the range [0,360] (inclusive range is ok). # It might be correct for any range that covers the circle exactly once, e.g. [-180,180] # (not fully analyzed for that), but it would definitely be wrong for e.g. [-0.001, 360.001]! # So be careful if you change how angle() works. angs = A(angs) gaps = angs[1:] - angs[:-1] gaps = [angs[0] - angs[-1] + 360] + list(gaps) i = argmax(gaps) ##e Someday we should check whether this largest gap is large enough for this to make sense (>>180); # we are treating the angles as "clustered together in the part of the circle other than this gap" # and averaging them within that cluster. It would also make sense to discard outliers, # but doing this without jittering the rotation angle (as individual points become closer # to being outliers) would be challenging. Maybe better to just give up unless gap is, say, >>340. ##e Before any of that, just get the sim to do this in a better way -- interpret the complete set of # atom motions as approximating some overall translation and rotation, and tell us this, so we can show # not only rotation, but axis wobble and misalignment, and so these can be plotted. angs = list(angs) angs = angs[i:] + angs[:i] # start with the one just after the largest gap relang0 = angs[0] angs = A(angs) - relang0 # be relative to that, when we average them # but let them all be in the range [0,360)! angs = (angs + 720) % 360 # We need to add 720 since Numeric's mod produces negative outputs # for negative inputs (unlike Python's native mod, which is correct)! # How amazingly ridiculous. ang = (sum(angs) / len(angs)) + relang0 ang = ang % 360 # this is Python mod, so it's safe return ang
def parse_result(self): """ Extract some information about the profile as well as the match state emmission scores. Keys of the returned dictionary:: 'AA', 'name', 'NrSeq', 'emmScore', 'accession', 'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum' @return: dictionary with warious information about the profile @rtype: dict """ ## check that the outfut file is there and seems valid if not os.path.exists(self.f_out): raise HmmerError,\ 'Hmmerfetch result file %s does not exist.'%self.f_out if T.fileLength(self.f_out) < 10: raise HmmerError,\ 'Hmmerfetch result file %s seems incomplete.'%self.f_out profileDic = {} ## read result hmm = open(self.f_out, 'r') out = hmm.read() hmm.close() ## collect some data about the hmm profile profileDic['name'] = self.hmmName profileDic['profLength'] = \ int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] ) profileDic['accession'] = \ string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] profileDic['NrSeq'] = \ int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] ) profileDic['AA'] = \ string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:] ## collect null emmission scores pattern = 'NULE[ ]+' + '[-0-9]+[ ]+' * 20 nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ] ## get emmision scores prob = [] for i in range(1, profileDic['profLength'] + 1): pattern = "[ ]+%i" % i + "[ ]+[-0-9]+" * 20 e = [float(j) for j in string.split(re.findall(pattern, out)[0])] prob += [e] profileDic['seqNr'] = N.transpose(N.take(prob, (0, ), 1)) profileDic['emmScore'] = N.array(prob)[:, 1:] ## calculate emission probablitities emmProb, nullProb = self.hmmEmm2Prob(nullEmm, profileDic['emmScore']) ent = [ N.resize(self.entropy(e, nullProb), (1, 20))[0] for e in emmProb ] profileDic['ent'] = N.array(ent) ###### TEST ##### proba = N.array(prob)[:, 1:] ## # test set all to max score ## p = proba ## p1 = [] ## for i in range( len(p) ): ## p1 += [ N.resize( p[i][N.argmax( N.array( p[i] ) )] , N.shape( p[i] ) ) ] ## profileDic['maxAll'] = p1 # test set all to N.sum( abs( probabilities ) ) p = proba p2 = [] for i in range(len(p)): p2 += [N.resize(N.sum(N.absolute(p[i])), N.shape(p[i]))] profileDic['absSum'] = p2 # set all to normalized max score p = proba p4 = [] for i in range(len(p)): p_scale = (p[i] - N.average(p[i])) / math.SD(p[i]) p4 += [ N.resize(p_scale[N.argmax(N.array(p_scale))], N.shape(p[i])) ] profileDic['maxAllScale'] = p4 return profileDic