def classifyNB(inputVec, p0Vect, p1Vect, pAbsive): p0 = sum(inputVec * p0Vect) + log(1 - pAbsive) p1 = sum(inputVec * p1Vect) + log(pAbsive) if p1 > p0: return 1 else: return 0
def trainNB0(trainMatrix, trainCategory): """ 朴素贝叶斯分类器训练函数 :param trainMatrix: 训练集输入向量 :param trainCategory: 每篇文档类别标签所构成的向量 :return: pAbusive:侮辱性文档的概率; 给定文档类别条件下p0:词汇表中正常单词出现的概率,p1:词汇表中侮辱性单词出现的概率 """ numTrainDocs = len(trainMatrix) numWords = len(trainMatrix[0]) # sum函数求和 pAbusive = sum(trainCategory) / float(numTrainDocs) # p0Num, p1Num = zeros(numWords), zeros(numWords) # p0Denom, p1Denom = 0.0, 0.0 # 为防止出现概率为0的情况,将所有词的出现数初始化为1,分母初始化为2 p0Num, p1Num = ones(numWords), ones(numWords) p0Denom, p1Denom = 2.0, 2.0 for i in range(numTrainDocs): if trainCategory[i] == 1: p1Num += trainMatrix[i] p1Denom += sum(trainMatrix[i]) else: p0Num += trainMatrix[i] p0Denom += sum(trainMatrix[i]) # p1Vect = p1Num/p1Denom # p0Vect = p0Num/p0Denom # 为防止下溢出,太多很小的数相乘,在python里会出现下溢出变为0 # 对每个元素除以该类别中的总词数 p1Vect = log(p1Num / p1Denom) p0Vect = log(p0Num / p0Denom) return p0Vect, p1Vect, pAbusive
def classifyNB(vec2Classify, p0Vec, p1Vec, pClass1): p1 = sum(vec2Classify * p1Vec) + log(pClass1) p0 = sum(vec2Classify * p0Vec) + log(1.0 - pClass1) if p1 > p0: return 1 else: return 0
def costFunction_Regular(theta, *args): print "现在在调用正则化的cost函数" # print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@",shape(theta) dataArr = asarray(args[0]) # args0 为特征数据 args1 为类别数据 labelArr = asarray(args[1]) m, n = shape(dataArr) # dataArr 已经加入bias # print "记录条数:",m theta = reshape(theta, (n, 1)) #print "costFunction_Regular中theta的值为:", theta hx = sigmodFunction(dataArr, theta) #计算预测的类别概率值 loghx = ma.log(hx) #print "log hx:",loghx yhx = dot(loghx.transpose(), labelArr) #print "yhx:",yhx log1_hx = ma.log(1 - hx) #print "log1_hx:",log1_hx #print (-yhx-dot(log1_hx.transpose(),(1-labelArr)))*1.0/m #print "lameda 值为:", args[2] # args[2]为传入的lameda参数 jtheta = (-yhx - dot(log1_hx.transpose(), (1 - labelArr))) * 1.0 / m + args[2] * 1.0 / (2 * m) * ( dot(theta.transpose(), theta) - theta[0, 0] ** 2) #gra=getGradient(dataArr,labelArr,theta,m) #print type(jtheta),type(gra.flatten()) #print gra #print "###################",type(array(jtheta)[0]),array(jtheta)[0] #print "costFunction_Regular得到的jtheta值为:", type(jtheta.flatten()[0]), jtheta, jtheta.flatten()[0] print "&&&&&7jtheta.flatten()[0]", jtheta.flatten()[0] return jtheta.flatten()[0]
def trainNB0(trainMatrix, trainClassMatrix): ''' 朴素贝叶斯分类器训练函数 :param trainMatrix: 训练集矩阵 :param trainClassMatrix: 包含训练集的类别矩阵 :return: p0Vect: 第0类的一个向量,其中每个维度的值表示在第0类的所有样本的所有词组中,该维度的词所占的比例, p1Vect: 意思和p0Vect类同, pAbusive: 第1类样本的数量在训练集所占的比例 ''' # 获取训练集有多少个样本 numOfTrainDocs = len(trainMatrix) # 获取词条向量所包含的属性个数 numOfWords = len(trainMatrix[0]) pAbusive = sum(trainClassMatrix) / numOfTrainDocs p0Num = ones(numOfWords) p1Num = ones(numOfWords) # 分母,表示在第0类下训练集所包含的词的总数量 p0Denom = 2.0 p1Denom = 2.0 for i in range(numOfTrainDocs): if trainClassMatrix[i] == 1: p1Num += trainMatrix[i] p1Denom += sum(trainMatrix[i]) else: p0Num += trainMatrix[i] p0Denom += sum(trainMatrix[i]) # 这里用log是防止下溢 p0Vect = log(p0Num / p0Denom) p1Vect = log(p1Num / p1Denom) return p0Vect, p1Vect, pAbusive
def cost(self, x, y) -> np.ndarray: ''' Calculate the cost of output layer compare with correct data Arguments: x (numpy.ndarray): input data, Shape: (total input data, input layer's total_nodes + 1) y (numpy.ndarray): the correct output data, Shape: (total input data, output layer's total_nodes) Precondition: Input layer and output layer. ''' prediction = self.predict(x) # Calculate Difference for y[i] = 1 a = np.multiply(y, ma.log(prediction).filled(0)) # Calculate Difference for y[i] = 0 b = np.multiply(1 - y, ma.log(1 - prediction).filled(0)) difference = a + b total_cost = -np.sum(difference, axis=0) / len(y) return np.sum(total_cost)
def predictNBO(p0Vec, p1Vec, pAbusive, inputVec): p1 = sum(inputVec * p1Vec) + log(pAbusive) p0 = sum(inputVec * p0Vec) + log(1.0 - pAbusive) if p1 > p0: return 1 else: return 0
def find_an_approximation(self, function_table: dict) -> Function: try: SLNX = sum(log(x) for x in function_table.keys()) SLNXX = sum(log(x) * log(x) for x in function_table.keys()) SY = sum(function_table.values()) SYLNX = sum(log(x) * y for x, y in function_table.items()) n = len(function_table) except ValueError: return None try: a, b = self.solve_matrix22([[n, SLNX], [SLNX, SLNXX]], [SY, SYLNX]) if a is None: return None fun = lambda x: a * log(x) + b s = sum( (fun(x) - function_table[x])**2 for x in function_table.keys()) root_mean_square_deviation = sqrt(s / n) f = Function(fun, f'ф = {round(a, 3)}*ln(x) {round(b, 3):+}', s, root_mean_square_deviation) self.print_approximation_table(function_table, f, self.function_type) return f except TypeError: return None
def average_in_flux(mag, dmag, axis=None): flux = 10**(mag / -2.5) dflux = np.log(10) / 2.5 * flux * dmag avg_dflux = np.power(np.sum(np.power(dflux, -2), axis), -0.5) avg_flux = np.sum(flux * np.power(dflux, -2), axis) * avg_dflux**2 avg_mag = -2.5 * np.log10(avg_flux) avg_dmag = 2.5 / np.log(10) * np.divide(avg_dflux, avg_flux) return avg_mag, avg_dmag
def entropy(array, dim=None): if dim is None: array = array.ravel() dim = 0 n = ma.sum(array, dim) array = ma.log(array) * array sum = ma.sum(array, dim) return (ma.log(n) - sum / n) / ma.log(2.0)
def computeCost(theta, X, y, lamda): m = np.shape(X)[0] hypo = sigmoid(X.dot(theta)) term1 = log(hypo).dot(-y) term2 = log(1.0 - hypo).dot(1 - y) left_hand = (term1 - term2) / m right_hand = theta.transpose().dot(theta) * lamda / (2 * m) return left_hand + right_hand
def log_linear_vinterp(T,P,levs): ''' # Author Charles Doutriaux # Version 1.1 # Expect 2D field here so there''s no reorder which I suspect to do a memory leak # email: [email protected] # Converts a field from sigma levels to pressure levels # Log linear interpolation # Input # T : temperature on sigma levels # P : pressure field from TOP (level 0) to BOTTOM (last level) # levs : pressure levels to interplate to (same units as P) # Output # t : temperature on pressure levels (levs) # External: Numeric''' import numpy.ma as MA ## from numpy.oldnumeric.ma import ones,Float,greater,less,logical_and,where,equal,log,asarray,Float16 sh=P.shape nsigma=sh[0] # Number of sigma levels try: nlev=len(levs) # Number of pressure levels except: nlev=1 # if only one level len(levs) would breaks t=[] for ilv in range(nlev): # loop through pressure levels try: lev=levs[ilv] # get value for the level except: lev=levs # only 1 level passed # print ' ......... level:',lev Pabv=MA.ones(P[0].shape,Numeric.Float) Tabv=-Pabv # Temperature on sigma level Above Tbel=-Pabv # Temperature on sigma level Below Pbel=-Pabv # Pressure on sigma level Below Pabv=-Pabv # Pressure on sigma level Above for isg in range(1,nsigma): # loop from second sigma level to last one ## print 'Sigma level #',isg a = MA.greater(P[isg], lev) # Where is the pressure greater than lev b = MA.less(P[isg-1],lev) # Where is the pressure less than lev # Now looks if the pressure level is in between the 2 sigma levels # If yes, sets Pabv, Pbel and Tabv, Tbel Pabv=MA.where(MA.logical_and(a,b),P[isg],Pabv) # Pressure on sigma level Above Tabv=MA.where(MA.logical_and(a,b),T[isg],Tabv) # Temperature on sigma level Above Pbel=MA.where(MA.logical_and(a,b),P[isg-1],Pbel) # Pressure on sigma level Below Tbel=MA.where(MA.logical_and(a,b),T[isg-1],Tbel) # Temperature on sigma level Below # end of for isg in range(1,nsigma) # val=where(equal(Pbel,-1.),Pbel.missing_value,lev) # set to missing value if no data below lev if there is tl=MA.masked_where(MA.equal(Pbel,-1.),MA.log(lev/MA.absolute(Pbel))/MA.log(Pabv/Pbel)*(Tabv-Tbel)+Tbel) # Interpolation t.append(tl) # add a level to the output # end of for ilv in range(nlev) return asMA(t).astype(Numeric.Float32) # convert t to an array
def KL_Measure(i, j): ''' 计算KL散度 :return: ''' KL1 = sum(i*(log(i/j).data)) KL2 = sum(j*(log(j/i).data)) D = (KL1 + KL2)/2 return 1/(1+ math.e ** D )
def KL_Measure(i, j): ''' 计算KL散度 :return: ''' KL1 = sum(i * (log(i / j).data)) KL2 = sum(j * (log(j / i).data)) D = (KL1 + KL2) / 2 return 1 / (1 + math.e**D)
def muti_bin2(css, Nc, bs, nc): ncs_1 = ma.array( np.apply_along_axis(lambda x: np.bincount(x, minlength=Nc), 1, css[:, bs == 1])) ncs_0 = nc - ncs_1 N = len(bs) nb1 = bs.sum() nb0 = N - nb1 return (1./N * (ma.sum(ncs_1 * ma.log(1.*N*ncs_1/nc/nb1),1).filled(0) +\ ma.sum(ncs_0 * ma.log(1.*N*ncs_0/nc/nb0),1).filled(0)))
def classifyNB(vec2Classify, p0Vec, p1Vec, pClass1): # p(w|c1)*p(c1) # sum(vec2Classify*p1Vec)是这句话的期望? p1 = sum(vec2Classify*p1Vec) + log(pClass1) # log(a*b) = log(a) + log(b) # p(w1, w2, w3....|c0)*p(c0)其中每个word是相互独立的 => p(w1|c0) * p(w2|c0) * p(w3|c0) *... # 再取对数 p0 = sum(vec2Classify*p0Vec) + log(1 - pClass1) print("p0:%d, p1:%d" % (p0, p1)) if p1 > p0: return 1 else: return 0
def _compute_shannon_evenness_index(self, lct, footprint, lct_mask): """Compute Shannon's evenness index""" cover_types_list = self._get_cover_types(lct) numerator_sum = ma.masked_array(zeros(shape=lct.shape, dtype=float32), mask=lct_mask) for cover_type in cover_types_list: pi = self._compute_pct_cover_type_within_footprint(lct, cover_type, footprint, lct_mask) numerator_sum += pi*ma.filled(ma.log(pi), 0) m = self._count_covertypes_within_window(lct, cover_types_list, footprint) return ma.filled(-numerator_sum / ma.log(m), 0).astype(float32)
def _compute_shannon_evenness_index(self, lct, footprint, lct_mask): """Compute Shannon's evenness index""" cover_types_list = self._get_cover_types(lct) numerator_sum = ma.masked_array(zeros(shape=lct.shape, dtype=float32), mask=lct_mask) for cover_type in cover_types_list: pi = self._compute_pct_cover_type_within_footprint( lct, cover_type, footprint, lct_mask) numerator_sum += pi * ma.filled(ma.log(pi), 0) m = self._count_covertypes_within_window(lct, cover_types_list, footprint) return ma.filled(-numerator_sum / ma.log(m), 0).astype(float32)
def _zfromp_MA(P, lapse_rate, P_bott, T_bott, z_bott): """Altitude given pressure in a constant lapse rate layer. The dry gas constant is used in calculations requiring the gas constant. See the docstring for press2alt for references. Input Arguments: * P: Pressure [hPa]. * lapse_rate: -dT/dz [K/m] over the layer. * P_bott: Pressure [hPa] at the base of the layer. * T_bott: Temperature [K] at the base of the layer. * z_bott: Geopotential altitude [m] of the base of the layer. Output: * Altitude [m] for each element given in the input arguments. All input arguments can be either a scalar or an MA array. All arguments that are MA arrays, however, are of the same size and shape. If every input argument is a scalar, the output is a scalar. If any of the input arguments is an MA array, the output is an MA array of the same size and shape. """ import numpy as N #jfp was import Numeric as N import numpy.ma as MA #jfp was import MA from atmconst import AtmConst const = AtmConst() if MA.size(lapse_rate) == 1: if MA.array(lapse_rate)[0] == 0.0: return ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \ z_bott else: exponent = (const.R_d * lapse_rate) / const.g return ((T_bott / lapse_rate) * (1. - (P/P_bott)**exponent)) + \ z_bott else: exponent = (const.R_d * lapse_rate) / const.g z = ((T_bott / lapse_rate) * (1. - (P/P_bott)**exponent)) + z_bott z_at_0 = ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \ z_bott zero_lapse_mask = MA.filled(MA.where(lapse_rate == 0., 1, 0), 0) zero_lapse_mask_indices_flat = N.nonzero(N.ravel(zero_lapse_mask)) z_flat = MA.ravel(z) MA.put( z_flat, zero_lapse_mask_indices_flat \ , MA.take(MA.ravel(z_at_0), zero_lapse_mask_indices_flat) ) return MA.reshape(z_flat, z.shape)
def _zfromp_MA(P, lapse_rate, P_bott, T_bott, z_bott): """Altitude given pressure in a constant lapse rate layer. The dry gas constant is used in calculations requiring the gas constant. See the docstring for press2alt for references. Input Arguments: * P: Pressure [hPa]. * lapse_rate: -dT/dz [K/m] over the layer. * P_bott: Pressure [hPa] at the base of the layer. * T_bott: Temperature [K] at the base of the layer. * z_bott: Geopotential altitude [m] of the base of the layer. Output: * Altitude [m] for each element given in the input arguments. All input arguments can be either a scalar or an MA array. All arguments that are MA arrays, however, are of the same size and shape. If every input argument is a scalar, the output is a scalar. If any of the input arguments is an MA array, the output is an MA array of the same size and shape. """ import numpy as N #jfp was import Numeric as N import numpy.ma as MA #jfp was import MA from atmconst import AtmConst const = AtmConst() if MA.size(lapse_rate) == 1: if MA.array(lapse_rate)[0] == 0.0: return ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \ z_bott else: exponent = (const.R_d * lapse_rate) / const.g return ((T_bott / lapse_rate) * (1. - (P/P_bott)**exponent)) + \ z_bott else: exponent = (const.R_d * lapse_rate) / const.g z = ((T_bott / lapse_rate) * (1. - (P / P_bott)**exponent)) + z_bott z_at_0 = ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \ z_bott zero_lapse_mask = MA.filled(MA.where(lapse_rate == 0., 1, 0), 0) zero_lapse_mask_indices_flat = N.nonzero(N.ravel(zero_lapse_mask)) z_flat = MA.ravel(z) MA.put( z_flat, zero_lapse_mask_indices_flat \ , MA.take(MA.ravel(z_at_0), zero_lapse_mask_indices_flat) ) return MA.reshape(z_flat, z.shape)
def classifyNB(vec2Classify, p0Vec, p1Vec, pClass1): """ 朴素贝叶斯分类函数 :param vec2Classify: :param p0Vec: :param p1Vec: :param pClass1: :return: """ p1 = sum(vec2Classify * p1Vec) + log(pClass1) p0 = sum(vec2Classify * p0Vec) + log(1.0 - pClass1) if p1 > p0: return 1 else: return 0
def classifyNB(vecToClassify, p0Vec, p1Vec, pClass1): ''' 利用训练好的模型进行分类 :param param: :param p0Vec: :param p1Vec: :param pClass1: :return: ''' p1 = sum(vecToClassify * p0Vec) + log(pClass1) p0 = sum(vecToClassify * p1Vec) + log(1 - pClass1) if p1 > p0: return 1 else: return 0
def computeNMI(CG, n_G, n_C): #CG should be a two column np array with Column 1 for Cluster and Column 2 for Class pcg = zeros([n_G,n_C]) for line in CG: pcg[line[0], line[1]] += 1 # column 1 is cluster and thus row pcg = pcg/pcg.sum() pc = pcg.sum(axis=0) pg = pcg.sum(axis=1) # construct pcpg, where pcpg[i,j]:= p(c=i) and p(g=j) pcpg = zeros([n_G, n_C]) for i in range(n_G): for j in range(n_C): print(i,j) pcpg[i,j] = 1/(pc[j]*pg[i]) forLog0 = multiply(pcg, pcpg) forLog0 = ma.log(forLog0) forLog = forLog0.filled(0) numerator = multiply(pcg, forLog).sum() denominatorM = -multiply(pc, log(pc)).sum() - multiply(pg, log(pg)) NMI = numerator/denominatorM.sum() return NMI
def fit_hurdle_gamma_vector(data): nonzero = data != 0.0 num_nonzero = np.sum(nonzero, axis = 1, keepdims = True) num_zero = data.shape[1] - num_nonzero insufficient_data = (num_nonzero <= SUFFICIENT_DATA_POINTS).flatten() prob_zero = num_zero / data.shape[1] data = ma.array(data = data, mask = ~nonzero) # Add small number to avoid 0s in the data causing issues. # Add small amount of noise to avoid 0s in the data causing issues # or all values being identical causing issues. data += np.random.uniform(1e-8, 0.2, size = data.shape) data_mean = np.mean(data, axis = 1) log_of_mean = np.log(data_mean) mean_of_logs = np.mean(ma.log(data), axis = 1) log_diff = mean_of_logs - log_of_mean shape = 0.5 / (log_of_mean - mean_of_logs) shape_reciprocal = 1 / shape difference = 1 while difference > 0.000005: numerator = log_diff + np.log(shape) - digamma(shape) denominator = (shape ** 2) * (shape_reciprocal - polygamma(1, shape)) tmp_shape_reciprocal = shape_reciprocal + numerator / denominator tmp_shape = 1 / tmp_shape_reciprocal difference = np.max(np.abs(tmp_shape - shape)) shape = tmp_shape shape_reciprocal = tmp_shape_reciprocal scale = data_mean / shape if np.any(np.isnan(shape)) or np.any(np.isnan(scale)): warn("NaN shape or scale value") return (shape.data, scale.data, prob_zero.flatten(), insufficient_data)
def gamma(self, mkt_dict_, engine_, unit_=None): """calculate option GAMMA with market data and engine""" _rate, _spot, _vol, _div, _method, _param, _sign, _strike, _t = self._prepare_risk_data( mkt_dict_, engine_) _unit = unit_ or self.unit if _method == EngineMethod.BS.value: _d1 = (log(_spot / _strike) + (_rate + _vol**2 / 2) * _t) / _vol / sqrt(_t) return exp(-_d1**2 / 2) / sqrt( 2 * pi) / _spot / _vol / sqrt(_t) * exp(-_div * _t) * _unit elif _method == EngineMethod.MC.value: from utils.monte_carlo import MonteCarlo _iteration = self._check_iter( _param[EngineParam.MCIteration.value]) _spot = MonteCarlo.stock_price(_iteration, isp=_spot, rate=_rate, div=_div, vol=_vol, t=_t) _step = 0.01 _gamma = [ ((max(_sign * (_s + 2 * _step - _strike), 0) - max(_sign * (_s - _strike), 0)) - (max(_sign * (_s - _strike), 0) - max(_sign * (_s - 2 * _step - _strike), 0))) / (4 * _step**2) for _s in _spot ] return average(_gamma) * exp(-_rate * _t) * _unit
def pv(self, mkt_dict_, engine_, unit_=None): """calculate option PV with market data and engine""" _rate, _spot, _vol, _div, _method, _param, _sign, _strike, _t = self._prepare_risk_data( mkt_dict_, engine_) _unit = unit_ or self.unit if _method == EngineMethod.BS.value: _d1 = (log(_spot / _strike) + (_rate - _div + _vol**2 / 2) * _t) / _vol / sqrt(_t) _d2 = _d1 - _vol * sqrt(_t) return _sign * ( _spot * exp(-_div * _t) * norm.cdf(_sign * _d1) - _strike * exp(-_rate * _t) * norm.cdf(_sign * _d2)) * _unit elif _method == EngineMethod.MC.value: from utils.monte_carlo import MonteCarlo _iteration = self._check_iter( _param[EngineParam.MCIteration.value]) _spot = MonteCarlo.stock_price(_iteration, isp=_spot, rate=_rate, div=_div, vol=_vol, t=_t) _price = [max(_sign * (_s - _strike), 0) for _s in _spot] return average(_price) * exp(-_rate * _t) * _unit
def F_ideal_int(g_s, density, multiplicities): rho_s = [density*g_i for g_i in g_s] rho_s_ma = [ma.masked_values(rho_i/density, 0) for rho_i in rho_s] log_s = [ma.log(rho_i_ma) for rho_i_ma in rho_s_ma] integral = sum([m*(rho_i * ma.filled(log_i, 0) - (rho_i - density)) \ for m, rho_i, log_i in zip(multiplicities, rho_s, log_s)]) return np.sum(integral)
def preprocess(self, X, method): """ Preprocess the data by scaling into the range of 0-1 with bins. """ if method == "bucket": # scales into 0-1 range with bins print("using the bucket prep method") from sklearn.preprocessing import KBinsDiscretizer est = KBinsDiscretizer(n_bins=10, encode="ordinal", strategy="quantile") est.fit(X) X_processed = est.transform(X) X_processed /= 10 # transform from nominal values to 0-1 return X_processed elif method == "clip": # clips the raw counts into a certain range print("using the clip prep method") cutoff = 1000 X_processed = np.minimum(X, cutoff) + np.sqrt( np.maximum(X - cutoff, 0)) return X_processed elif method == "log": # takes the log of the count print("using the log prep method") import numpy.ma as ma mask = ma.log(X) # mask logged data to replace NaN (log0) with 0 X_processed = ma.fix_invalid(mask, fill_value=0).data return X_processed else: raise Exception("Incorrect preprocess method name passed!")
def F_derviative_slow(rho_s, rho0_s, u_s, zr_s, r, x, y, z, kBT, voxel, shape=None): """ Derivative of 3D-RISM functional: F'_3drism_i = kBT*np.log(g_i (r)) + u_i(r) - kBT*sum(z_ij * h_j) return array of derivative with the shape n_s * grid """ if np.any(shape): rho_s = np.reshape(rho_s, shape) if len(zr_s.shape) == 1: zr_s = zr_s.reshape((-1, 1)) rho_s_ma = [ma.masked_values(rho_i/rho0_i, 0, atol=1.0e-15) for rho_i, rho0_i in\ zip(rho_s, rho0_s)] log_s = [ma.log(rho_i_ma) for rho_i_ma in rho_s_ma] deriv = [] for i, (u_i, rho_i, log_i) in enumerate(zip(u_s, rho_s, log_s)): # we multipy u_i by kbt as well as it is actually u_i*beta mean_f_part = kBT * (ma.filled(log_i, -34.53877639491068) + u_i) ex_part = 0 for rho_j, rho0_j, zr in zip(rho_s, rho0_s, zr_s.T): delta_rho = rho_j - rho0_j #convol = ndimage.filters.convolve(delta_rho, z_3d_ij, mode='constant') convol = slow_convolution(delta_rho, zr, r, x, y, z, voxel) ex_part += convol deriv.append(mean_f_part + ex_part * kBT) return np.array(deriv)
def F_derviative(rho_s, rho0_s, u_s, z_3ds, kBT, voxel, args, shape=None): if np.any(shape): rho_s = np.reshape(rho_s, shape) rho_s_ma = [ma.masked_values(rho_i/rho0_i, 0, atol=1.0e-15) for rho_i, rho0_i in\ zip(rho_s, rho0_s)] log_s = [ma.log(rho_i_ma) for rho_i_ma in rho_s_ma] deriv = [] for i, (log_i, u_i, rho_i, z_3d_i) in\ enumerate(zip(log_s, u_s, rho_s, z_3ds)): mean_f_part = kBT * (ma.filled(log_i, -34.53877639491068) + u_i) ex_part = 0 for j, (rho_j, rho0_j, z_3d_ij) in enumerate(zip(rho_s, rho0_s, z_3ds[i, :])): delta_rho = rho_j - rho0_j delta_rho_k = np.fft.fftn(delta_rho) delta_rho_k = np.fft.fftshift( delta_rho_k) # shift 0 freq to the middle convol = np.real( np.fft.ifftn(np.fft.ifftshift(delta_rho_k * z_3d_ij))) ex_part += kBT * convol / args.multiplicities[i] deriv_i = mean_f_part - ex_part #fix discontinuty deriv_i = np.where(deriv_i > 500, 500, deriv_i) deriv.append(deriv_i) print('median', np.median(deriv_i)) print('avg', np.average(deriv_i)) return np.array(deriv) #- 0.122324502591
def F_ideal_int(rho_s, rho0_s): rho_s_ma = np.array([ma.masked_values(rho_i/rho0_i,0,atol=1.0e-15) for rho_i, rho0_i \ in zip(rho_s, rho0_s)]) log_s = [ma.log(rho_i_ma) for rho_i_ma in rho_s_ma] integral = sum([rho_i * ma.filled(log_i, 0) - (rho_i - rho0_i) \ for rho_i, rho0_i, log_i in zip(rho_s, rho0_s, log_s)]) return np.sum(integral)
def __call__(self, value, clip=None): if clip is None: clip = self.clip if cbook.iterable(value): vtype = 'array' val = ma.asarray(value).astype(np.float) else: vtype = 'scalar' val = ma.array([value]).astype(np.float) self.autoscale_None(val) vmin, vmax = self.vmin, self.vmax if vmin > vmax: raise ValueError("minvalue must be less than or equal to maxvalue") elif vmin <= 0: raise ValueError("values must all be positive") elif vmin == vmax: return 0.0 * val else: if clip: mask = ma.getmask(val) val = ma.array(np.clip(val.filled(vmax), vmin, vmax), mask=mask) result = (ma.log(val) - np.log(vmin)) / (np.log(vmax) - np.log(vmin)) if vtype == 'scalar': result = result[0] return result
def dewpoint(e): r'''Calculate the ambient dewpoint given the vapor pressure. Parameters ---------- e : array_like Water vapor partial pressure in mb Returns ------- array_like Dew point temperature in degrees Celsius. See Also -------- dewpoint_rh, saturation_vapor_pressure, vapor_pressure Notes ----- This function inverts the Bolton 1980 [3] formula for saturation vapor pressure to instead calculate the temperature. This yield the following formula for dewpoint in degrees Celsius: .. math:: T = \frac{243.5 log(e / 6.112)}{17.67 - log(e / 6.112)} References ---------- .. [3] Bolton, D., 1980: The Computation of Equivalent Potential Temperature. Mon. Wea. Rev., 108, 1046-1053. ''' val = log(e / sat_pressure_0c) return 243.5 * val / (17.67 - val)
def zonal_avg(data,Log=False): """ Compute the zonal average of field on POP gx3v5 grid. Shape of input data is expected to be either [nfoo,nlat,nlon] or [nlat,nlon]. Log=True computes the geometric average. Output: arrays zavg and lat """ print 'computing zonal average' # get lat and lon for new regular grid # fpin = Nio.open_file('/home/ivan/Python/data/lat_t.nc','r') fpin = Nio.open_file('/home/emunoz/Python/mapping/model_grid/lat_t.nc','r') lat_t = fpin.variables['lat_t'][:] lat_t_edges = fpin.variables['lat_t_edges'][:] fpin.close() # fpin = Nio.open_file('/home/ivan/Python/data/gx3v5.nc','r') fpin = Nio.open_file('/home/emunoz/Python/mapping/model_grid/gx3v5.nc','r') lon_t = N.sort(fpin.variables['TLONG'][0,:]) ulon = N.sort(fpin.variables['ULONG'][0,:]) lon_t_edges = N.concatenate((ulon,ulon[0,N.newaxis]+360.),0) # get gx3v5 lat and lon tlon = fpin.variables['TLONG'][:] tlat = fpin.variables['TLAT'][:] fpin.close() # compute area of cells in new regular grid area = grid_area(lon_t_edges,lat_t_edges) nlat = lat_t.shape[0] nlon = lon_t.shape[0] if data.ndim == 3: new_data = MA.zeros((data.shape[0],nlat,nlon),dtype=float) elif data.ndim == 2: new_data = MA.zeros((nlat,nlon),dtype=float) else: print 'Check field dimensions' sys.exit() # geometric mean? if Log: work = MA.log(data) else: work = data # remap data to new regular grid for i in range(nlat): #print 'lat = %.2f'%(lat_t[i]) for j in range(nlon): new_data[:,i,j] = extract_loc(lon_t[j],lat_t[i],tlon,tlat,work) # compute zonal average if Log: za_data = (MA.exp(MA.average(new_data,axis=-1, weights=N.resize(area,new_data.shape)))) else: za_data = (MA.average(new_data,axis=-1, weights=N.resize(area,new_data.shape))) return za_data, lat_t
def geometric_mean(array, axis=0): '''return the geometric mean of an array removing all zero-values but retaining total length ''' non_zero = ma.masked_values(array, 0) log_a = ma.log(non_zero) return ma.exp(log_a.mean(axis=axis))
def test_testUfuncs1(self): # Test various functions such as sin, cos. (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d assert_(eq(np.cos(x), cos(xm))) assert_(eq(np.cosh(x), cosh(xm))) assert_(eq(np.sin(x), sin(xm))) assert_(eq(np.sinh(x), sinh(xm))) assert_(eq(np.tan(x), tan(xm))) assert_(eq(np.tanh(x), tanh(xm))) with np.errstate(divide='ignore', invalid='ignore'): assert_(eq(np.sqrt(abs(x)), sqrt(xm))) assert_(eq(np.log(abs(x)), log(xm))) assert_(eq(np.log10(abs(x)), log10(xm))) assert_(eq(np.exp(x), exp(xm))) assert_(eq(np.arcsin(z), arcsin(zm))) assert_(eq(np.arccos(z), arccos(zm))) assert_(eq(np.arctan(z), arctan(zm))) assert_(eq(np.arctan2(x, y), arctan2(xm, ym))) assert_(eq(np.absolute(x), absolute(xm))) assert_(eq(np.equal(x, y), equal(xm, ym))) assert_(eq(np.not_equal(x, y), not_equal(xm, ym))) assert_(eq(np.less(x, y), less(xm, ym))) assert_(eq(np.greater(x, y), greater(xm, ym))) assert_(eq(np.less_equal(x, y), less_equal(xm, ym))) assert_(eq(np.greater_equal(x, y), greater_equal(xm, ym))) assert_(eq(np.conjugate(x), conjugate(xm))) assert_(eq(np.concatenate((x, y)), concatenate((xm, ym)))) assert_(eq(np.concatenate((x, y)), concatenate((x, y)))) assert_(eq(np.concatenate((x, y)), concatenate((xm, y)))) assert_(eq(np.concatenate((x, y, x)), concatenate((x, ym, x))))
def __call__(self, value, clip=None): if clip is None: clip = self.clip if cbook.iterable(value): vtype = 'array' val = ma.asarray(value).astype(np.float) else: vtype = 'scalar' val = ma.array([value]).astype(np.float) self.autoscale_None(val) vmin, vmax = self.vmin, self.vmax if vmin > vmax: raise ValueError("minvalue must be less than or equal to maxvalue") elif vmin<=0: raise ValueError("values must all be positive") elif vmin==vmax: return 0.0 * val else: if clip: mask = ma.getmask(val) val = ma.array(np.clip(val.filled(vmax), vmin, vmax), mask=mask) result = (ma.log(val)-np.log(vmin))/(np.log(vmax)-np.log(vmin)) if vtype == 'scalar': result = result[0] return result
def index(request, *args, **kwags): if request.method == 'POST': data = '' s = 'Invalid Input!' form = ArForm(request.POST) if form.is_valid(): print "VLAID DATA" data = form.cleaned_data['article'] if len(data.strip()) < 20: return render(request, 'classifier/index.html', {"article": ""}) dataW = fitWordArticle(data, vectorizer) dataG = createGrammarDictionary([data]) dataF = np.concatenate((dataW[0], dataG[0]), axis=None) dataF = ma.log(dataF/dataF.sum()*100+1).filled(0) outside = [] outside.append(dataF) outside = np.array(outside) print "TESTER" print outside.shape ynew = clsfyModel.predict(outside) print ynew if ynew[0][0] > ynew[0][1]: s = "You are a Milton Paper Writer" else: s = "You are a Milton Measure Writer" return render(request, 'classifier/index.html', {"article": str(s)}) else: return render(request, 'classifier/index.html', {"article": ""})
def transform_non_affine(self, a): sign = np.sign(a) masked = ma.masked_inside(a, -self.linthresh, self.linthresh, copy=False) log = sign * self.linthresh * (self._linscale_adj + ma.log(np.abs(masked) / self.linthresh) / self._log_base) if masked.mask.any(): return ma.where(masked.mask, a * self._linscale_adj, log) else: return log
def transform(self, a): sign = np.sign(np.asarray(a)) masked = ma.masked_inside(a, -self.linthresh, self.linthresh, copy=False) log = sign * ma.log(np.abs(masked)) / self._log_base if masked.mask.any(): return np.asarray(ma.where(masked.mask, a * self._linadjust, log)) else: return np.asarray(log)
def transform(self, a): sign = np.sign(a) masked = ma.masked_inside(a, -self.linthresh, self.linthresh, copy=False) log = sign * self.linthresh * (1 + ma.log(np.abs(masked) / self.linthresh)) if masked.mask.any(): return ma.where(masked.mask, a, log) else: return log
def findEout(self, numSamples=1000): e_out = 0 dataSamples = [self.createDataPoint() for _ in range(numSamples)] for x, y in dataSamples: e_out += log(1 + exp(-1 * multiply(y, np.dot(transpose(self.w), x)))) e_out /= float(numSamples) return e_out
def transform(self, a): a = np.asarray(a) sign = np.sign(a) masked = ma.masked_inside(a, -self.linthresh, self.linthresh, copy=False) if masked.mask.any(): log = sign * (ma.log(np.abs(masked)) / self._log_base + self._linadjust) return np.asarray(ma.where(masked.mask, a * self._linscale, log)) else: return sign * (np.log(np.abs(a)) / self._log_base + self._linadjust)
def ln_shifted_auto(v): """If 'v' has values <= 0, it is shifted in a way that min(v)=1 before doing log. Otherwise the log is done on the original 'v'.""" vmin = ma.minimum(v) if vmin <= 0: values = v - vmin + 1 else: values = v return ma.log(values)
def trainNB1(trainMatrix, trainCategory): numTrainDocs = len(trainMatrix) numWord = len(trainMatrix[0]) pAbusive = sum(trainCategory) / float(numTrainDocs) p0Num = ones(numWord) p1Num = ones(numWord) p0Denom = 2.0 p1Denom = 2.0 for i in range(numTrainDocs): if trainCategory[i] == 1: p1Num += trainMatrix[i] p1Denom += sum(trainMatrix[i]) else: p0Num += trainMatrix[i] p0Denom += sum(trainMatrix[i]) p0Vect = log(p0Num / p0Denom) p1Vect = log(p1Num / p1Denom) return p0Vect, p1Vect, pAbusive
def perform_tf_idf(self): words_per_doc = sum(self.a, axis=0) docs_per_word = sum(self.a, axis=1) rows, cols = self.a.shape for i in range(rows): for j in range(cols): if words_per_doc[j] != 0: self.a[i, j] = (self.a[i, j] / words_per_doc[j]) * log(float(cols) / docs_per_word[i]) if isinf(self.a[i, j]): print "Infinity found!"
def corr_proba(r, ndata, ndataset=2, dof=False): """Probability of rejecting correlations - **r**: Correlation coefficient - **ndata**: Number of records use for correlations - **ndataset**, optional: Number of datasets (1 for autocorrelations, else 2) [default: 2] .. todo:: This must be rewritten using :mod:`scipy.stats` """ # Basic tests ndata = MA.masked_equal(ndata,0,copy=0) r = MV2.masked_where(MA.equal(MA.absolute(r),1.),r,copy=0) # Degree of freedom if dof: df = ndata else: df = ndata-2-ndataset # Advanced test: prevent extreme values by locally decreasing the dof reduc = N.ones(r.shape) z = None while z is None or MA.count(MA.masked_greater(z,-600.)): if z is not None: imax = MA.argmin(z.ravel()) reduc.flat[imax] += 1 dfr = df/reduc t = r*MV2.sqrt(dfr/((1.0-r)* (1.0+r))) a = 0.5*dfr b = 0.5 x = df/(dfr+t**2) z = _gammaln(a+b)-_gammaln(a)-_gammaln(b)+a*MA.log(x)+b*MA.log(1.0-x) # Perfom the test and format the variable prob = MV2.masked_array(betai(a,b,x),axes=r.getAxisList())*100 prob.id = 'corr_proba' ; prob.name = prob.id prob.long_name = 'Probability of rejection' prob.units = '%' return prob
def __call__(self, value, clip=None): if clip is None: clip = self.clip if cbook.iterable(value): vtype = 'array' val = ma.asarray(value).astype(np.float) else: vtype = 'scalar' val = ma.array([value]).astype(np.float) self.autoscale_None(val) vmin, vmax = self.vmin, self.vmax vin, cin = self.vin, self.cin if vmin > vmax: raise ValueError("minvalue must be less than or equal to maxvalue") elif vmin > 0: raise ValueError("minvalue must be less than 0") elif vmax < 0: raise ValueError("maxvalue must be greater than 0") elif vmin==vmax: result = 0.0 * val else: if clip: mask = ma.getmask(val) val = ma.array(np.clip(val.filled(vmax), vmin, vmax), mask=mask) ipos = (val > vin) ineg = (val < -vin) izero = ~(ipos | ineg) result = ma.empty_like(val) result[izero] = 0.5 + cin * val[izero] / vin result[ipos] = 0.5 + cin + (0.5 - cin) * \ (ma.log(val[ipos]) - np.log(vin)) / (np.log(vmax) - np.log(vin)) result[ineg] = 0.5 - cin - (0.5 - cin) * \ (ma.log(-val[ineg]) - np.log(vin)) / (np.log(-vmin) - np.log(vin)) result.mask = ma.getmask(val) if vtype == 'scalar': result = result[0] return result
def get_diff_jpdf_with_ini(A, P0, epsilon): """Get joint distribution with same marginal distribution with A and cross entropy with A is epsilon.""" t, _ = A.shape obj_func = lambda x: dot(x, log(x)) f_eqcons = lambda x: eq_cons(x, A, epsilon) out = fmin_slsqp(func = obj_func, x0 = P0.reshape(-1,), f_eqcons = f_eqcons, bounds = [[0, 1]] *t*t, ) return out.reshape(A.shape)
def main(tab_fname=None): varlist = [] M = np.genfromtxt(name_iter(open(tab_fname), varlist), usemask=True, delimiter='\t') Q = ma.MaskedArray(data=M.data, mask=(M.mask|(M.data == 0))) Q = ma.log(Q) # save matrix back to .tab format fp = open(tab_fname + ".logscale.tab", "w") for i, row in enumerate(Q): fp.write(varlist[i] + '\t') fp.write('\t'.join(map(tostr, row))) fp.write('\n') fp.close()
def dewpoint(e): """ Calculate the ambient dewpoint given the vapor pressure. e : scalar or array The water vapor partial pressure in mb. Returns : scalar or array The dew point temperature in degrees Celsius, with the shape of the result being determined using numpy's broadcasting rules. """ val = log(e / sat_pressure_0c) return 243.5 * val / (17.67 - val)
def costFunction_Regular(theta, *args): # 参数顺序theta1,theta2 ,dataX_new,dataY_new #theta=reshape(theta,()) ''' theta1=args[0] #传入初始theta的目的就是为了获得他们的维度而已 theta2=args[1] dataX=args[2] #5000*401 m=shape(dataX)[0] dataY=args[3] #5000*10 K=args[4] # 类别个数 lamda =args[5] #print "dataX的维度:",shape(dataX) #print "dataY的维度:",shape(dataY) m1,n1=shape(theta1) m2,n2=shape(theta2) #print m1,n1,m1*n1 theta1=reshape(theta[0:m1*n1],(m1,n1)) theta2=reshape(theta[m1*n1:m1*n1+m2*n2],(m2,n2)) ''' theta1, theta2, dataX, K, dataY, m = getArgs(theta, args) hx = getPredictRes(dataX, theta1, theta2) # 5000*10 #print "!@!@!@!@!@!!@!@!@!@!@!@!@@!,",shape(hx) jtheta = 0 ''' K为类别数目,m为训练样本数 之所以这里用了一个for循环而不用向量相乘,是因为如果纯用向量相乘最终只有对角线的数据有用,如果m,n很大,这样浪费时间以及空间 由于K一般比m即训练样本的数目少很多,所以这里用向量相乘计算m个样本,外层用K个for循环,即时间复杂度为O(K),而不是O(m) ''' for i in range(K): #dataY[:,0] 为类别1 Ak = -1 * dot(dataY[:, i].T, log(hx[:, i])) Bk = -1 * dot((1 - dataY[:, i]).T, log(1 - hx[:, i])) jtheta += Ak + Bk jtheta = jtheta * 1.0 / m #print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~",jtheta reguTerm = getRegularTerm(theta1, theta2, lamda, m) jtheta_Regular = jtheta + reguTerm print "#################jtheta_Regular", jtheta_Regular return jtheta_Regular
def _get_gamma_cdf(aseries, condition): """ Returns the CDF values for aseries. Parameters ---------- aseries : TimeSeries Annual series of data (one column per period) condition : TimeSeries Period mask. """ # Mask the months for which no precipitations were recorded aseries_ = ma.masked_values(aseries, 0) # Get the proportion of 0 precipitation for each period (MM/WW) pzero = 1. - aseries_.count(axis=0) / aseries.count(axis=0).astype(float) # Mask outside the reference period aseries_._mask |= condition._data meanrain = aseries_.mean(axis=0) aleph = ma.log(meanrain) - ma.log(aseries_).mean(axis=0) alpha = (1. + ma.sqrt(1.+4./3*aleph)) / (4.*aleph) beta = meanrain/alpha # Get the Gamma CDF (per month) gcdf = pzero + (1.-pzero) * ssd.gamma.cdf(aseries,alpha,scale=beta) return gcdf
def geoMean(array): ''' Generate the geometric mean of a list or array, removing all zero-values but retaining total length ''' if isinstance(array, pandas.core.frame.DataFrame): array = array.as_matrix() else: pass non_zero = ma.masked_values(array, 0) log_a = ma.log(non_zero) geom_mean = ma.exp(log_a.mean()) return geom_mean