def mad(m,axis=0): """Returns Median Absolute Deviation of the given array along the given axis. """ m = Numeric.asarray(m) mx = Numeric.asarray(median(m,axis),Numeric.Float) xt = Numeric.transpose(m, [axis]+range(axis)+range(axis+1,Numeric.rank(m))) # do not use swapaxes: (0,1,2) -swap-> (2,1,0); (0,1,2) -transpose-> (2,0,1) return MLab.median(Numeric.absolute(xt-mx))
def _checkOrth(self, T, TT, eps=0.0001, output=False): """check if the basis is orthogonal on a set of points x: TT == T*transpose(T) == c*Identity INPUT: T: matrix of values of polynomials calculated at common reference points (x) TT = T * transpose(T) eps: max numeric error """ TTd0 = (-1.*Numeric.identity(Numeric.shape(TT)[0])+1) * TT # TTd0 = TT with 0s on the main diagonal s = Numeric.sum(Numeric.sum(Numeric.absolute(TTd0))) minT = MLab.min(MLab.min(T)) maxT = MLab.max(MLab.max(T)) minTTd0 = MLab.min(MLab.min(TTd0)) maxTTd0 = MLab.max(MLab.max(TTd0)) if not s < eps: out = "NOT ORTHOG, min(T), max(T):\t%f\t%f\n" % (minT, maxT) out += " min(TTd0), max(TTd0), sum-abs-el(TTd0):\t%f\t%f\t%f" % (minTTd0, maxTTd0, s) if output: print out return False else: raise out elif output: out = "ORTHOGONAL, min(T), max(T):\t%f\t%f\n" % (minT, maxT) out += " min(TTd0), max(TTd0), sum-abs-el(TTd0):\t%f\t%f\t%f" % (minTTd0, maxTTd0, s) print out return True
def function(self,params): """Hyperbolic function.""" aspect_ratio = params['aspect_ratio'] x = self.pattern_x/aspect_ratio y = self.pattern_y thickness = params['thickness'] gaussian_width = params['smoothing'] size = params['size'] half_thickness = thickness / 2.0 distance_from_vertex_middle = fmod(sqrt(absolute(x**2 - y**2)),size) distance_from_vertex_middle = minimum(distance_from_vertex_middle,size - distance_from_vertex_middle) distance_from_vertex = distance_from_vertex_middle - half_thickness hyperbola = 1.0 - greater_equal(distance_from_vertex,0.0) sigmasq = gaussian_width*gaussian_width with float_error_ignore(): falloff = exp(divide(-distance_from_vertex*distance_from_vertex, 2.0*sigmasq)) return maximum(falloff, hyperbola)
def _checkOrth(self, T, TT, eps=0.0001, output=False): """check if the basis is orthogonal on a set of points x: TT == T*transpose(T) == c*Identity INPUT: T: matrix of values of polynomials calculated at common reference points (x) TT = T * transpose(T) eps: max numeric error """ TTd0 = (-1. * Numeric.identity(Numeric.shape(TT)[0]) + 1) * TT # TTd0 = TT with 0s on the main diagonal s = Numeric.sum(Numeric.sum(Numeric.absolute(TTd0))) minT = MLab.min(MLab.min(T)) maxT = MLab.max(MLab.max(T)) minTTd0 = MLab.min(MLab.min(TTd0)) maxTTd0 = MLab.max(MLab.max(TTd0)) if not s < eps: out = "NOT ORTHOG, min(T), max(T):\t%f\t%f\n" % (minT, maxT) out += " min(TTd0), max(TTd0), sum-abs-el(TTd0):\t%f\t%f\t%f" % ( minTTd0, maxTTd0, s) if output: print out return False else: raise out elif output: out = "ORTHOGONAL, min(T), max(T):\t%f\t%f\n" % (minT, maxT) out += " min(TTd0), max(TTd0), sum-abs-el(TTd0):\t%f\t%f\t%f" % ( minTTd0, maxTTd0, s) print out return True
def usr_similarity(x, y): """return similarity of two usr_descriptor vectors, x and y """ x = N.array(x) num = float(x.shape[0]) y = N.array(y) # normalized and montonically inverted Manhattan distance return num / (num + N.add.reduce(N.absolute(x - y)))
def coef_maxCut(self, appxCoef): """returns the coefficients different from zero up to the abs. max. coefficient where the first coefficient is excluded from finding the max. accepts 2d matrix of coefficients where rows represent different curves """ assert len(appxCoef.shape) == 2 k = Numeric.shape(appxCoef)[1] maxInd = Numeric.argmax(Numeric.absolute(appxCoef[:,1:]),1) + 1 lowDiagOnes = Numeric.fromfunction(lambda i,j: i>=j, (k,k)) coefSelector = Numeric.take(lowDiagOnes, maxInd, 0) return appxCoef*coefSelector
def coef_maxCut(self, appxCoef): """returns the coefficients different from zero up to the abs. max. coefficient where the first coefficient is excluded from finding the max. accepts 2d matrix of coefficients where rows represent different curves """ assert len(appxCoef.shape) == 2 k = Numeric.shape(appxCoef)[1] maxInd = Numeric.argmax(Numeric.absolute(appxCoef[:, 1:]), 1) + 1 lowDiagOnes = Numeric.fromfunction(lambda i, j: i >= j, (k, k)) coefSelector = Numeric.take(lowDiagOnes, maxInd, 0) return appxCoef * coefSelector
def radial(x, y, wide, gaussian_width): """ Radial grating - A sector of a circle with Gaussian fall-off. Parameter wide determines in wide of sector in radians. """ angle = absolute(arctan2(y,x)) half_wide = wide/2 radius = 1.0 - greater_equal(angle,half_wide) distance = angle - half_wide sigmasq = gaussian_width*gaussian_width with float_error_ignore(): falloff = exp(divide(-distance*distance, 2.0*sigmasq)) return maximum(radius,falloff)
def hyperbola(x, y, thickness, gaussian_width, axis): """ Two conjugate hyperbolas with Gaussian fall-off which share the same asymptotes. abs(x^2/a^2 - y^2/b^2) = 1 As a = b = axis, these hyperbolas are rectangular. """ difference = absolute(x**2 - y**2) hyperbola = 1.0 - bitwise_xor(greater_equal(axis**2,difference),greater_equal(difference,(axis + thickness)**2)) distance_inside_hyperbola = sqrt(difference) - axis distance_outside_hyperbola = sqrt(difference) - axis - thickness sigmasq = gaussian_width*gaussian_width with float_error_ignore(): inner_falloff = exp(divide(-distance_inside_hyperbola*distance_inside_hyperbola, 2.0*sigmasq)) outer_falloff = exp(divide(-distance_outside_hyperbola*distance_outside_hyperbola, 2.0*sigmasq)) return maximum(hyperbola,maximum(inner_falloff,outer_falloff))
def function(self,params): """Radial function.""" aspect_ratio = params['aspect_ratio'] x = self.pattern_x/aspect_ratio y = self.pattern_y gaussian_width = params['smoothing'] angle = absolute(arctan2(y,x)) half_length = params['arc_length']/2 radius = 1.0 - greater_equal(angle,half_length) distance = angle - half_length sigmasq = gaussian_width*gaussian_width with float_error_ignore(): falloff = exp(divide(-distance*distance, 2.0*sigmasq)) return maximum(radius, falloff)
def outliers(a, z=5, it=5): """ Iterative detection of outliers in a set of numeric values. Requirement: len(a) > 0; outlier detection is only performed if len(a)>2 @param a: array or list of values @type a: [ float ] @param z: z-score threshold for iterative refinement of median and SD @type z: float @param it: maximum number of iterations @type it: int @return: outlier mask, median and standard deviation of last iteration @rtype: N.array( int ), float, float """ assert (len(a) > 0) mask = N.ones(len(a)) out = N.zeros(len(a)) if len(a) < 3: return out, N.median(a), N.std(a) for i in range(it): b = N.compress(N.logical_not(out), a) me = N.median(b) sd = N.std(b) bz = N.absolute((N.array(a) - me) / sd) # pseudo z-score of each value o = bz > z ## print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N.sum(o)) ## stop if converged or reached bottom if (N.sum(o) == N.sum(out)) or (N.sum(o) > len(a) - 3): return o, me, sd out = o return out, me, sd
def outliers( a, z=5, it=5 ): """ Iterative detection of outliers in a set of numeric values. Requirement: len(a) > 0; outlier detection is only performed if len(a)>2 @param a: array or list of values @type a: [ float ] @param z: z-score threshold for iterative refinement of median and SD @type z: float @param it: maximum number of iterations @type it: int @return: outlier mask, median and standard deviation of last iteration @rtype: N.array( int ), float, float """ assert( len(a) > 0 ) mask = N.ones( len(a) ) out = N.zeros( len(a) ) if len(a) < 3: return out, N.median(a), N.std(a) for i in range( it ): b = N.compress( N.logical_not(out), a ) me = N.median( b ) sd = N.std( b ) bz = N.absolute((N.array( a ) - me) / sd) # pseudo z-score of each value o = bz > z ## print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N.sum(o)) ## stop if converged or reached bottom if (N.sum(o) == N.sum(out)) or (N.sum(o) > len(a) - 3): return o, me, sd out = o return out, me, sd
def parse_result(self): """ Extract some information about the profile as well as the match state emmission scores. Keys of the returned dictionary:: 'AA', 'name', 'NrSeq', 'emmScore', 'accession', 'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum' @return: dictionary with warious information about the profile @rtype: dict """ ## check that the outfut file is there and seems valid if not os.path.exists(self.f_out): raise HmmerError,\ 'Hmmerfetch result file %s does not exist.'%self.f_out if T.fileLength(self.f_out) < 10: raise HmmerError,\ 'Hmmerfetch result file %s seems incomplete.'%self.f_out profileDic = {} ## read result hmm = open(self.f_out, 'r') out = hmm.read() hmm.close() ## collect some data about the hmm profile profileDic['name'] = self.hmmName profileDic['profLength'] = \ int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] ) profileDic['accession'] = \ string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] profileDic['NrSeq'] = \ int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] ) profileDic['AA'] = \ string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:] ## collect null emmission scores pattern = 'NULE[ ]+' + '[-0-9]+[ ]+' * 20 nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ] ## get emmision scores prob = [] for i in range(1, profileDic['profLength'] + 1): pattern = "[ ]+%i" % i + "[ ]+[-0-9]+" * 20 e = [float(j) for j in string.split(re.findall(pattern, out)[0])] prob += [e] profileDic['seqNr'] = N.transpose(N.take(prob, (0, ), 1)) profileDic['emmScore'] = N.array(prob)[:, 1:] ## calculate emission probablitities emmProb, nullProb = self.hmmEmm2Prob(nullEmm, profileDic['emmScore']) ent = [ N.resize(self.entropy(e, nullProb), (1, 20))[0] for e in emmProb ] profileDic['ent'] = N.array(ent) ###### TEST ##### proba = N.array(prob)[:, 1:] ## # test set all to max score ## p = proba ## p1 = [] ## for i in range( len(p) ): ## p1 += [ N.resize( p[i][N.argmax( N.array( p[i] ) )] , N.shape( p[i] ) ) ] ## profileDic['maxAll'] = p1 # test set all to N.sum( abs( probabilities ) ) p = proba p2 = [] for i in range(len(p)): p2 += [N.resize(N.sum(N.absolute(p[i])), N.shape(p[i]))] profileDic['absSum'] = p2 # set all to normalized max score p = proba p4 = [] for i in range(len(p)): p_scale = (p[i] - N.average(p[i])) / math.SD(p[i]) p4 += [ N.resize(p_scale[N.argmax(N.array(p_scale))], N.shape(p[i])) ] profileDic['maxAllScale'] = p4 return profileDic
def EulerImplicit(parent, yInit, odeSettings, yMin=None, yMax=None, yScale=None): """Integrate with Euler explicit""" MAXTRY = 40 LARGEVALITER = 1000 #Settings for implicit iteration #Default are fairly loose settings if hasattr(odeSettings, 'maxIter'): maxIter = odeSettings.maxIter else: maxIter = 15 if hasattr(odeSettings, 'dampingFactor'): damping = odeSettings.dampingFactor else: damping = 1.0 damping = 1.0 if hasattr(odeSettings, 'tolerance'): tol = odeSettings.tolerance else: tol = 1.0E-3 h = odeSettings.step xEnd = odeSettings.end xInit = odeSettings.init CalcDerivativesMethod = parent.CalculateDerivatives Validate = None if hasattr(parent, 'ValidateStepResults'): Validate = parent.ValidateStepResults nuEquations = len(yInit) jacobian = zeros((nuEquations, nuEquations), Float) #Did scale values came in? autoScale = False if yScale == None: autoScale = True #Iterate along the whole distance h = Numeric.sign(xEnd-xInit) * abs(h) #Make sure the sign of h makes sense hBase = h #Original value of h hNext = h #Initialize hNext as h hMin = TINYESTVALUE #Always positive x = xInit y = array(yInit) stepCnt = 0 converged = False loadResults = True path = parent.GetPath() xNextStore = xInit while stepCnt < LARGEVALITER and ((x-xEnd)*(xEnd-xInit) < 0.0): stepCnt += 1 #Calculate derivatives right where we are parent.InfoMessage('CalculatingStep', (stepCnt, path, x, xInit, xEnd)) if ( hBase > 0.0 and x >= xNextStore ) or ( hBase < 0.0 and x <= xNextStore ): loadResults = True xNextStore += hBase else: loadResults = False if yMin != None and min(y-yMin) < 0.0: #See if it is a round off problem. if hasattr(parent, 'RoundValues'): y = parent.RoundValues(y, yMin, yMax, yScale) dy_dx = CalcDerivativesMethod(x, y, loadResults) #Set h as the estimated next h h = hNext #Make sure it won't go over if (x + h - xEnd) * (x + h - xInit) > 0.0: h = xEnd - x #Scale values if autoScale: C = ones(len(y), Float) yScale = Numeric.maximum(C, absolute(y)) #Iterate until a proper step size is found innerCnt = 0 ySave = array(y, Float) #The implicit algorithm implies that the derivatives are evaluated at h #but sometimes it is necessary to evaluate the derivatives at h-epsilon #to avoid convergence problems. #In this case hEval = h-epsilon hEval = h doImplicit = 1 while innerCnt <= MAXTRY: innerCnt += 1 try: #Initial guess of yNext as explicit Euler yNext = ySave + h*dy_dx #Make sure it is yNext = parent.StepToBoundaries(x+h, yNext, h) yNext = Numeric.clip(yNext, yMin, yMax) #Iterate in the implicit step with quasi Newton Raphson #with approximate Jacobian iter = 0 convImplicit = False while iter < maxIter: iter += 1 yNextNew = ySave + h*CalcDerivativesMethod(x + hEval, yNext) yNextNew = parent.StepToBoundaries(x+h, yNextNew, h) #if Validate: yNextNew = Validate(x+h, yNextNew) rhs = yNextNew - yNext rhs /= yScale if max(Numeric.absolute(rhs)) < tol: convImplicit = True y = Numeric.clip(yNextNew, yMin, yMax) break #Calculate Jacobian with crude differentials yNextForJac = array(yNext, Float) rhsForJac = array(rhs, Float) shift = 0.0001 shift = shift * yScale for j in xrange(nuEquations): old = yNextForJac[j] yNextForJac[j] = yNextForJac[j] + shift[j] yNextNewForJac = ySave + h*CalcDerivativesMethod(x+hEval, yNextForJac) rhsForJac = (yNextNewForJac - yNextForJac) / yScale for k in xrange(nuEquations): jacobian[k][j] = (rhsForJac[k] - rhs[k])/(shift[j]) yNextForJac[j] = old #Invert Jacobian and get the new estimate for y jacobian = inverse(jacobian) deltaX = -dot(jacobian, rhs)*damping yNext = UpdateX(yNext, deltaX) yNext = parent.StepToBoundaries(x+h, yNext, h) yNext = Numeric.clip(yNext, yMin, yMax) if convImplicit: if h >= 0.0: hNext = min(hBase, h*4.0) else: hNext = max(hBase, h*4.0) y = yNext break else: #Reduce the step h *= 0.25 if abs(h) <= hMin: raise SimError('StepSizeTooSmall', (path, h)) except: #Reduce the step h *= 0.25 if abs(h) <= hMin: raise SimError('StepSizeTooSmall', (path, h)) if not convImplicit: raise SimError('StepSizeTooSmall', (path, h)) x += h ##Decide if we keep on iterating#################################################### if ((x-xEnd)*(xEnd-xInit) >= 0.0): #Calculate derivatives yet again just so final results are loaded. #Not the best way to do things but good enough for now loadResults = True dy_dx = CalcDerivativesMethod(xEnd, y, loadResults) converged = True break #################################################################################### if not converged: parent.InfoMessage('ODEMaxSteps', (stepCnt, path)) return converged
def parse_result( self ): """ Extract some information about the profile as well as the match state emmission scores. Keys of the returned dictionary:: 'AA', 'name', 'NrSeq', 'emmScore', 'accession', 'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum' @return: dictionary with warious information about the profile @rtype: dict """ ## check that the outfut file is there and seems valid if not os.path.exists( self.f_out ): raise HmmerError,\ 'Hmmerfetch result file %s does not exist.'%self.f_out if T.fileLength( self.f_out ) < 10: raise HmmerError,\ 'Hmmerfetch result file %s seems incomplete.'%self.f_out profileDic = {} ## read result hmm = open( self.f_out, 'r') out = hmm.read() hmm.close() ## collect some data about the hmm profile profileDic['name'] = self.hmmName profileDic['profLength'] = \ int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] ) profileDic['accession'] = \ string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] profileDic['NrSeq'] = \ int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] ) profileDic['AA'] = \ string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:] ## collect null emmission scores pattern = 'NULE[ ]+' + '[-0-9]+[ ]+'*20 nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ] ## get emmision scores prob=[] for i in range(1, profileDic['profLength']+1): pattern = "[ ]+%i"%i + "[ ]+[-0-9]+"*20 e = [ float(j) for j in string.split(re.findall(pattern, out)[0]) ] prob += [ e ] profileDic['seqNr'] = N.transpose( N.take( prob, (0,),1 ) ) profileDic['emmScore'] = N.array(prob)[:,1:] ## calculate emission probablitities emmProb, nullProb = self.hmmEmm2Prob( nullEmm, profileDic['emmScore']) ent = [ N.resize( self.entropy(e, nullProb), (1,20) )[0] for e in emmProb ] profileDic['ent'] = N.array(ent) ###### TEST ##### proba = N.array(prob)[:,1:] ## # test set all to max score ## p = proba ## p1 = [] ## for i in range( len(p) ): ## p1 += [ N.resize( p[i][N.argmax( N.array( p[i] ) )] , N.shape( p[i] ) ) ] ## profileDic['maxAll'] = p1 # test set all to N.sum( abs( probabilities ) ) p = proba p2 = [] for i in range( len(p) ) : p2 += [ N.resize( N.sum( N.absolute( p[i] )), N.shape( p[i] ) ) ] profileDic['absSum'] = p2 # set all to normalized max score p = proba p4 = [] for i in range( len(p) ) : p_scale = (p[i] - N.average(p[i]) )/ math.SD(p[i]) p4 += [ N.resize( p_scale[N.argmax( N.array(p_scale) )] , N.shape( p[i] ) ) ] profileDic['maxAllScale'] = p4 return profileDic
def Thermal(self, model, case): # Thermal Calcs ## Ac = model["RK_A"] T = case.Prop["T"] P = case.Prop["P"] a = case.Prop["a"] xf = case.Prop["xf"] yf = case.Prop["yf"] A = case.Prop["A"] B = case.Prop["B"] Zli = case.Prop["Zli"] Zvi = case.Prop["Zvi"] V_li = case.Prop["Vli"] V_vi = case.Prop["Vvi"] x = case.Prop["x"] ac = model["RK_A"] b = model["RK_B"] Cp, H0, S0 = self.Thermo.Calc(model["CP_A"], model["CP_B"], model["CP_C"], model["CP_D"], T, R) dadT = case.Prop["dadT"] d2adT2 = case.Prop["d2adT2"] # Liquid dA_L = self.dA(Zli, a, b, B, R, T) dS_L = self.dS(Zli, dadT, b, B, R, T) dH_L = self.dH(Zli, dA_L, dS_L, R, T) INTd2PdT2_L = self.INTd2PdT2(Zli, d2adT2, b, B) dCv_L = self.dCv(INTd2PdT2_L, R, T) dPdT_L = self.dPdT(dadT, b, R, T, V_li) dPdV_L = self.dPdV(a, b, R, T, V_li) dCp_L = self.dCp(dCv_L, dPdT_L, dPdV_L, T) # Vapor dA_V = self.dA(Zvi, a, b, B, R, T) dS_V = self.dS(Zvi, dadT, b, B, R, T) dH_V = self.dH(Zvi, dA_V, dS_V, R, T) INTd2PdT2_V = self.INTd2PdT2(Zvi, d2adT2, b, B) dCv_V = self.dCv(INTd2PdT2_V, R, T) dPdT_V = self.dPdT(dadT, b, R, T, V_vi) dPdV_V = self.dPdV(a, b, R, T, V_vi) dCp_V = self.dCp(dCv_V, dPdT_V, dPdV_V, T) # Mix HV_i = model["HV"] * power(absolute((T - model["TC"]) / (model["TB"] - model["TC"])), 0.38) model["HV_T"] = HV_i Ho_M_v = sum(yf * (H0 - dH_V)) Ho_M_l = sum(xf * (H0 - dH_L)) So_M_v = sum(yf * (S0 - dS_V)) - R * sum(yf * log(yf)) So_M_l = sum(xf * (S0 - dS_L)) - R * sum(xf * log(xf)) H_v = Ho_M_v H_l = Ho_M_l - sum(xf * HV_i) HV = H_v - H_l SV = HV / T S_v = So_M_v S_l = So_M_l - SV # Cp and Cv : Cp-Cv=R,Cv=Cp-R Cv_v = Cp - R case.Prop["Cp_v"] = Cp - dCp_V case.Prop["Cv_v"] = Cv_v - dCv_V # Save result in the case Hentalpy case.Prop["H"] = H_v * (case.Prop["FracVap"]) + H_l * (1 - case.Prop["FracVap"]) case.Prop["H_l"] = H_l case.Prop["H_v"] = H_v case.Prop["HV"] = HV # Save result in the case Emtropy case.Prop["S"] = S_v * (case.Prop["FracVap"]) + S_l * (1 - case.Prop["FracVap"]) case.Prop["S_l"] = S_l case.Prop["S_v"] = S_v G_l = H_l - T * S_l G_v = H_v - T * S_v # Save result in the case Free case.Prop["G"] = G_v * (case.Prop["FracVap"]) + G_l * (1 - case.Prop["FracVap"]) case.Prop["G_l"] = G_l case.Prop["G_v"] = G_v U_l = H_l - sum(xf * P * V_li) U_v = H_v - sum(yf * P * V_vi) # Save result in the case Free case.Prop["U"] = U_v * (case.Prop["FracVap"]) + U_l * (1 - case.Prop["FracVap"]) case.Prop["U_l"] = U_l case.Prop["U_v"] = U_v A_l = U_l - T * S_l A_v = U_v - T * S_v # Save result in the case Free case.Prop["AFree"] = U_v * (case.Prop["FracVap"]) + U_l * (1 - case.Prop["FracVap"]) case.Prop["AFree_l"] = A_l case.Prop["AFree_v"] = A_v # Hentapy and gibbs formation Energy case.Prop["HF"] = sum(model["DELHF"] * x) case.Prop["GF"] = sum(model["DELGF"] * x)