Пример #1
0
def classifyNB(inputVec, p0Vect, p1Vect, pAbsive):
    p0 = sum(inputVec * p0Vect) + log(1 - pAbsive)
    p1 = sum(inputVec * p1Vect) + log(pAbsive)
    if p1 > p0:
        return 1
    else:
        return 0
Пример #2
0
def trainNB0(trainMatrix, trainCategory):
    """
    朴素贝叶斯分类器训练函数
    :param trainMatrix: 训练集输入向量
    :param trainCategory: 每篇文档类别标签所构成的向量
    :return: pAbusive:侮辱性文档的概率;
    给定文档类别条件下p0:词汇表中正常单词出现的概率,p1:词汇表中侮辱性单词出现的概率
    """
    numTrainDocs = len(trainMatrix)
    numWords = len(trainMatrix[0])
    # sum函数求和
    pAbusive = sum(trainCategory) / float(numTrainDocs)
    # p0Num, p1Num = zeros(numWords), zeros(numWords)
    # p0Denom, p1Denom = 0.0, 0.0
    # 为防止出现概率为0的情况,将所有词的出现数初始化为1,分母初始化为2
    p0Num, p1Num = ones(numWords), ones(numWords)
    p0Denom, p1Denom = 2.0, 2.0
    for i in range(numTrainDocs):
        if trainCategory[i] == 1:
            p1Num += trainMatrix[i]
            p1Denom += sum(trainMatrix[i])
        else:
            p0Num += trainMatrix[i]
            p0Denom += sum(trainMatrix[i])
    # p1Vect = p1Num/p1Denom
    # p0Vect = p0Num/p0Denom
    # 为防止下溢出,太多很小的数相乘,在python里会出现下溢出变为0
    # 对每个元素除以该类别中的总词数
    p1Vect = log(p1Num / p1Denom)
    p0Vect = log(p0Num / p0Denom)
    return p0Vect, p1Vect, pAbusive
Пример #3
0
def classifyNB(vec2Classify, p0Vec, p1Vec, pClass1):
    p1 = sum(vec2Classify * p1Vec) + log(pClass1)
    p0 = sum(vec2Classify * p0Vec) + log(1.0 - pClass1)
    if p1 > p0:
        return 1
    else:
        return 0
Пример #4
0
def costFunction_Regular(theta, *args):
    print "现在在调用正则化的cost函数"
    # print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@",shape(theta)
    dataArr = asarray(args[0])  # args0 为特征数据  args1 为类别数据
    labelArr = asarray(args[1])
    m, n = shape(dataArr)  # dataArr 已经加入bias
    # print "记录条数:",m
    theta = reshape(theta, (n, 1))
    #print "costFunction_Regular中theta的值为:", theta
    hx = sigmodFunction(dataArr, theta)  #计算预测的类别概率值
    loghx = ma.log(hx)
    #print "log hx:",loghx
    yhx = dot(loghx.transpose(), labelArr)
    #print "yhx:",yhx
    log1_hx = ma.log(1 - hx)
    #print "log1_hx:",log1_hx
    #print (-yhx-dot(log1_hx.transpose(),(1-labelArr)))*1.0/m
    #print "lameda 值为:", args[2]  # args[2]为传入的lameda参数
    jtheta = (-yhx - dot(log1_hx.transpose(), (1 - labelArr))) * 1.0 / m + args[2] * 1.0 / (2 * m) * (
        dot(theta.transpose(), theta) - theta[0, 0] ** 2)
    #gra=getGradient(dataArr,labelArr,theta,m)
    #print type(jtheta),type(gra.flatten())
    #print gra
    #print "###################",type(array(jtheta)[0]),array(jtheta)[0]
    #print "costFunction_Regular得到的jtheta值为:", type(jtheta.flatten()[0]), jtheta, jtheta.flatten()[0]
    print "&&&&&7jtheta.flatten()[0]", jtheta.flatten()[0]
    return jtheta.flatten()[0]
Пример #5
0
def trainNB0(trainMatrix, trainClassMatrix):
    '''
    朴素贝叶斯分类器训练函数
    :param trainMatrix: 训练集矩阵
    :param trainClassMatrix: 包含训练集的类别矩阵
    :return: p0Vect: 第0类的一个向量,其中每个维度的值表示在第0类的所有样本的所有词组中,该维度的词所占的比例, p1Vect: 意思和p0Vect类同, pAbusive: 第1类样本的数量在训练集所占的比例
    '''
    # 获取训练集有多少个样本
    numOfTrainDocs = len(trainMatrix)
    # 获取词条向量所包含的属性个数
    numOfWords = len(trainMatrix[0])
    pAbusive = sum(trainClassMatrix) / numOfTrainDocs
    p0Num = ones(numOfWords)
    p1Num = ones(numOfWords)
    # 分母,表示在第0类下训练集所包含的词的总数量
    p0Denom = 2.0
    p1Denom = 2.0
    for i in range(numOfTrainDocs):
        if trainClassMatrix[i] == 1:
            p1Num += trainMatrix[i]
            p1Denom += sum(trainMatrix[i])
        else:
            p0Num += trainMatrix[i]
            p0Denom += sum(trainMatrix[i])
    # 这里用log是防止下溢
    p0Vect = log(p0Num / p0Denom)
    p1Vect = log(p1Num / p1Denom)
    return p0Vect, p1Vect, pAbusive
Пример #6
0
    def cost(self, x, y) -> np.ndarray:
        ''' Calculate the cost of output layer compare with correct data

        Arguments:
            x (numpy.ndarray): input data,
                Shape: (total input data, input layer's total_nodes + 1)

            y (numpy.ndarray): the correct output data,
                Shape: (total input data, output layer's total_nodes)
        
        Precondition:
            Input layer and output layer.
        '''
        prediction = self.predict(x)

        # Calculate Difference for y[i] = 1
        a = np.multiply(y, ma.log(prediction).filled(0))

        # Calculate Difference for y[i] = 0
        b = np.multiply(1 - y, ma.log(1 - prediction).filled(0))

        difference = a + b

        total_cost = -np.sum(difference, axis=0) / len(y)
        return np.sum(total_cost)
Пример #7
0
def predictNBO(p0Vec, p1Vec, pAbusive, inputVec):
    p1 = sum(inputVec * p1Vec) + log(pAbusive)
    p0 = sum(inputVec * p0Vec) + log(1.0 - pAbusive)
    if p1 > p0:
        return 1
    else:
        return 0
    def find_an_approximation(self, function_table: dict) -> Function:
        try:
            SLNX = sum(log(x) for x in function_table.keys())
            SLNXX = sum(log(x) * log(x) for x in function_table.keys())
            SY = sum(function_table.values())
            SYLNX = sum(log(x) * y for x, y in function_table.items())
            n = len(function_table)
        except ValueError:
            return None

        try:
            a, b = self.solve_matrix22([[n, SLNX], [SLNX, SLNXX]], [SY, SYLNX])
            if a is None:
                return None
            fun = lambda x: a * log(x) + b
            s = sum(
                (fun(x) - function_table[x])**2 for x in function_table.keys())
            root_mean_square_deviation = sqrt(s / n)
            f = Function(fun, f'ф = {round(a, 3)}*ln(x) {round(b, 3):+}', s,
                         root_mean_square_deviation)
            self.print_approximation_table(function_table, f,
                                           self.function_type)
            return f
        except TypeError:
            return None
Пример #9
0
def average_in_flux(mag, dmag, axis=None):
    flux = 10**(mag / -2.5)
    dflux = np.log(10) / 2.5 * flux * dmag
    avg_dflux = np.power(np.sum(np.power(dflux, -2), axis), -0.5)
    avg_flux = np.sum(flux * np.power(dflux, -2), axis) * avg_dflux**2
    avg_mag = -2.5 * np.log10(avg_flux)
    avg_dmag = 2.5 / np.log(10) * np.divide(avg_dflux, avg_flux)
    return avg_mag, avg_dmag
Пример #10
0
def entropy(array, dim=None):
    if dim is None:
        array = array.ravel()
        dim = 0
    n = ma.sum(array, dim)
    array = ma.log(array) * array
    sum = ma.sum(array, dim)
    return (ma.log(n) - sum / n) / ma.log(2.0)
Пример #11
0
def entropy(array, dim=None):
    if dim is None:
        array = array.ravel()
        dim = 0
    n = ma.sum(array, dim)
    array = ma.log(array) * array
    sum = ma.sum(array, dim)
    return (ma.log(n) - sum / n) / ma.log(2.0)
Пример #12
0
def average_in_flux(mag, dmag, axis=None):
    flux = 10**(mag / -2.5)
    dflux = np.log(10) / 2.5 * flux * dmag
    avg_dflux = np.power(np.sum(np.power(dflux, -2), axis), -0.5)
    avg_flux = np.sum(flux * np.power(dflux, -2), axis) * avg_dflux**2
    avg_mag = -2.5 * np.log10(avg_flux)
    avg_dmag = 2.5 / np.log(10) * np.divide(avg_dflux, avg_flux)
    return avg_mag, avg_dmag
Пример #13
0
def computeCost(theta, X, y, lamda):
    m = np.shape(X)[0]
    hypo = sigmoid(X.dot(theta))
    term1 = log(hypo).dot(-y)
    term2 = log(1.0 - hypo).dot(1 - y)
    left_hand = (term1 - term2) / m
    right_hand = theta.transpose().dot(theta) * lamda / (2 * m)
    return left_hand + right_hand
Пример #14
0
def log_linear_vinterp(T,P,levs):
    '''
    # Author Charles Doutriaux
    # Version 1.1
    # Expect 2D field here so there''s no reorder which I suspect to do a memory leak
    # email: [email protected]
    # Converts a field from sigma levels to pressure levels
    # Log linear interpolation


    # Input
    # T :    temperature on sigma levels
    # P :    pressure field from TOP (level 0) to BOTTOM (last level)
    # levs : pressure levels to interplate to (same units as P)

    # Output
    # t :    temperature on pressure levels (levs)

    # External: Numeric'''
    import numpy.ma as MA
##     from numpy.oldnumeric.ma import ones,Float,greater,less,logical_and,where,equal,log,asarray,Float16
    sh=P.shape
    nsigma=sh[0] # Number of sigma levels
    try:
        nlev=len(levs)  # Number of pressure levels
    except:
        nlev=1  # if only one level len(levs) would breaks
    t=[]
    for ilv in range(nlev): # loop through pressure levels
        try:
            lev=levs[ilv] # get value for the level
        except:
            lev=levs  # only 1 level passed
#       print '          ......... level:',lev
        Pabv=MA.ones(P[0].shape,Numeric.Float)
        Tabv=-Pabv # Temperature on sigma level Above
        Tbel=-Pabv # Temperature on sigma level Below
        Pbel=-Pabv # Pressure on sigma level Below
        Pabv=-Pabv # Pressure on sigma level Above
        for isg in range(1,nsigma): # loop from second sigma level to last one
##             print 'Sigma level #',isg
            a = MA.greater(P[isg],  lev) # Where is the pressure greater than lev
            b = MA.less(P[isg-1],lev)    # Where is the pressure less than lev

            # Now looks if the pressure level is in between the 2 sigma levels
            # If yes, sets Pabv, Pbel and Tabv, Tbel
            Pabv=MA.where(MA.logical_and(a,b),P[isg],Pabv) # Pressure on sigma level Above
            Tabv=MA.where(MA.logical_and(a,b),T[isg],Tabv) # Temperature on sigma level Above
            Pbel=MA.where(MA.logical_and(a,b),P[isg-1],Pbel) # Pressure on sigma level Below
            Tbel=MA.where(MA.logical_and(a,b),T[isg-1],Tbel) # Temperature on sigma level Below
        # end of for isg in range(1,nsigma)
#       val=where(equal(Pbel,-1.),Pbel.missing_value,lev) # set to missing value if no data below lev if there is
        
        tl=MA.masked_where(MA.equal(Pbel,-1.),MA.log(lev/MA.absolute(Pbel))/MA.log(Pabv/Pbel)*(Tabv-Tbel)+Tbel) # Interpolation
        t.append(tl) # add a level to the output
    # end of for ilv in range(nlev)
    return asMA(t).astype(Numeric.Float32) # convert t to an array
Пример #15
0
def KL_Measure(i, j):
    '''
    计算KL散度
    :return:
    '''
    KL1 = sum(i*(log(i/j).data))
    KL2 = sum(j*(log(j/i).data))
    D = (KL1 + KL2)/2
    return 1/(1+ math.e ** D )
Пример #16
0
def KL_Measure(i, j):
    '''
    计算KL散度
    :return:
    '''
    KL1 = sum(i * (log(i / j).data))
    KL2 = sum(j * (log(j / i).data))
    D = (KL1 + KL2) / 2
    return 1 / (1 + math.e**D)
Пример #17
0
def muti_bin2(css, Nc, bs, nc):
    ncs_1 = ma.array(
        np.apply_along_axis(lambda x: np.bincount(x, minlength=Nc), 1,
                            css[:, bs == 1]))
    ncs_0 = nc - ncs_1
    N = len(bs)
    nb1 = bs.sum()
    nb0 = N - nb1
    return (1./N * (ma.sum(ncs_1 * ma.log(1.*N*ncs_1/nc/nb1),1).filled(0) +\
                    ma.sum(ncs_0 * ma.log(1.*N*ncs_0/nc/nb0),1).filled(0)))
Пример #18
0
def classifyNB(vec2Classify, p0Vec, p1Vec, pClass1):
    # p(w|c1)*p(c1)
    # sum(vec2Classify*p1Vec)是这句话的期望?
    p1 = sum(vec2Classify*p1Vec) + log(pClass1)  # log(a*b) = log(a) + log(b)
    # p(w1, w2, w3....|c0)*p(c0)其中每个word是相互独立的 => p(w1|c0) * p(w2|c0) * p(w3|c0) *...
    # 再取对数
    p0 = sum(vec2Classify*p0Vec) + log(1 - pClass1)
    print("p0:%d, p1:%d" % (p0, p1))
    if p1 > p0:
        return 1
    else:
        return 0
Пример #19
0
    def _compute_shannon_evenness_index(self, lct, footprint, lct_mask):        
        """Compute Shannon's evenness index"""
        cover_types_list = self._get_cover_types(lct)
        numerator_sum = ma.masked_array(zeros(shape=lct.shape, dtype=float32),
                                           mask=lct_mask)
        for cover_type in cover_types_list:
            pi = self._compute_pct_cover_type_within_footprint(lct, cover_type, footprint, lct_mask)
            numerator_sum += pi*ma.filled(ma.log(pi), 0)

        m = self._count_covertypes_within_window(lct, cover_types_list, footprint)

        return ma.filled(-numerator_sum / ma.log(m), 0).astype(float32)
Пример #20
0
    def _compute_shannon_evenness_index(self, lct, footprint, lct_mask):
        """Compute Shannon's evenness index"""
        cover_types_list = self._get_cover_types(lct)
        numerator_sum = ma.masked_array(zeros(shape=lct.shape, dtype=float32),
                                        mask=lct_mask)
        for cover_type in cover_types_list:
            pi = self._compute_pct_cover_type_within_footprint(
                lct, cover_type, footprint, lct_mask)
            numerator_sum += pi * ma.filled(ma.log(pi), 0)

        m = self._count_covertypes_within_window(lct, cover_types_list,
                                                 footprint)

        return ma.filled(-numerator_sum / ma.log(m), 0).astype(float32)
Пример #21
0
def _zfromp_MA(P, lapse_rate, P_bott, T_bott, z_bott):
    """Altitude given pressure in a constant lapse rate layer.

    The dry gas constant is used in calculations requiring the gas
    constant.  See the docstring for press2alt for references.

    Input Arguments:
    * P:  Pressure [hPa].
    * lapse_rate:  -dT/dz [K/m] over the layer.
    * P_bott:  Pressure [hPa] at the base of the layer.
    * T_bott:  Temperature [K] at the base of the layer.
    * z_bott:  Geopotential altitude [m] of the base of the layer.

    Output:
    * Altitude [m] for each element given in the input arguments.

    All input arguments can be either a scalar or an MA array.  All 
    arguments that are MA arrays, however, are of the same size and 
    shape.  If every input argument is a scalar, the output is a scalar.
    If any of the input arguments is an MA array, the output is an MA 
    array of the same size and shape.
    """
    import numpy as N
    #jfp was import Numeric as N
    import numpy.ma as MA
    #jfp was import MA
    from atmconst import AtmConst

    const = AtmConst()

    if MA.size(lapse_rate) == 1:
        if MA.array(lapse_rate)[0] == 0.0:
            return ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \
                   z_bott
        else:
            exponent = (const.R_d * lapse_rate) / const.g
            return ((T_bott / lapse_rate) * (1. - (P/P_bott)**exponent)) + \
                   z_bott
    else:
        exponent = (const.R_d * lapse_rate) / const.g
        z = ((T_bott / lapse_rate) * (1. - (P/P_bott)**exponent)) + z_bott
        z_at_0 = ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \
                 z_bott

        zero_lapse_mask = MA.filled(MA.where(lapse_rate == 0., 1, 0), 0)
        zero_lapse_mask_indices_flat = N.nonzero(N.ravel(zero_lapse_mask))
        z_flat = MA.ravel(z)
        MA.put( z_flat, zero_lapse_mask_indices_flat \
              , MA.take(MA.ravel(z_at_0), zero_lapse_mask_indices_flat) )
        return MA.reshape(z_flat, z.shape)
Пример #22
0
def _zfromp_MA(P, lapse_rate, P_bott, T_bott, z_bott):
    """Altitude given pressure in a constant lapse rate layer.

    The dry gas constant is used in calculations requiring the gas
    constant.  See the docstring for press2alt for references.

    Input Arguments:
    * P:  Pressure [hPa].
    * lapse_rate:  -dT/dz [K/m] over the layer.
    * P_bott:  Pressure [hPa] at the base of the layer.
    * T_bott:  Temperature [K] at the base of the layer.
    * z_bott:  Geopotential altitude [m] of the base of the layer.

    Output:
    * Altitude [m] for each element given in the input arguments.

    All input arguments can be either a scalar or an MA array.  All 
    arguments that are MA arrays, however, are of the same size and 
    shape.  If every input argument is a scalar, the output is a scalar.
    If any of the input arguments is an MA array, the output is an MA 
    array of the same size and shape.
    """
    import numpy as N
    #jfp was import Numeric as N
    import numpy.ma as MA
    #jfp was import MA
    from atmconst import AtmConst

    const = AtmConst()

    if MA.size(lapse_rate) == 1:
        if MA.array(lapse_rate)[0] == 0.0:
            return ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \
                   z_bott
        else:
            exponent = (const.R_d * lapse_rate) / const.g
            return ((T_bott / lapse_rate) * (1. - (P/P_bott)**exponent)) + \
                   z_bott
    else:
        exponent = (const.R_d * lapse_rate) / const.g
        z = ((T_bott / lapse_rate) * (1. - (P / P_bott)**exponent)) + z_bott
        z_at_0 = ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \
                 z_bott

        zero_lapse_mask = MA.filled(MA.where(lapse_rate == 0., 1, 0), 0)
        zero_lapse_mask_indices_flat = N.nonzero(N.ravel(zero_lapse_mask))
        z_flat = MA.ravel(z)
        MA.put( z_flat, zero_lapse_mask_indices_flat \
              , MA.take(MA.ravel(z_at_0), zero_lapse_mask_indices_flat) )
        return MA.reshape(z_flat, z.shape)
Пример #23
0
def classifyNB(vec2Classify, p0Vec, p1Vec, pClass1):
    """
    朴素贝叶斯分类函数
    :param vec2Classify:
    :param p0Vec:
    :param p1Vec:
    :param pClass1:
    :return:
    """
    p1 = sum(vec2Classify * p1Vec) + log(pClass1)
    p0 = sum(vec2Classify * p0Vec) + log(1.0 - pClass1)
    if p1 > p0:
        return 1
    else:
        return 0
Пример #24
0
def classifyNB(vecToClassify, p0Vec, p1Vec, pClass1):
    '''
    利用训练好的模型进行分类
    :param param:
    :param p0Vec:
    :param p1Vec:
    :param pClass1:
    :return:
    '''
    p1 = sum(vecToClassify * p0Vec) + log(pClass1)
    p0 = sum(vecToClassify * p1Vec) + log(1 - pClass1)
    if p1 > p0:
        return 1
    else:
        return 0
Пример #25
0
def computeNMI(CG, n_G, n_C):
    #CG should be a two column np array with Column 1 for Cluster and Column 2 for Class
    pcg = zeros([n_G,n_C])
    for line in CG:
        pcg[line[0], line[1]] += 1 # column 1 is cluster and thus row
    pcg = pcg/pcg.sum()
    
    pc = pcg.sum(axis=0)
    pg = pcg.sum(axis=1)
    
#    construct pcpg, where pcpg[i,j]:= p(c=i) and p(g=j)
    pcpg = zeros([n_G, n_C])
    for i in range(n_G):
        for j in range(n_C):
            print(i,j)
            pcpg[i,j] = 1/(pc[j]*pg[i])
    
    
    forLog0 = multiply(pcg, pcpg)
    forLog0 = ma.log(forLog0)
    forLog = forLog0.filled(0)
    
    numerator = multiply(pcg, forLog).sum()
    denominatorM = -multiply(pc, log(pc)).sum() - multiply(pg, log(pg))
    NMI = numerator/denominatorM.sum()
    return NMI
def fit_hurdle_gamma_vector(data):
    nonzero = data != 0.0
    num_nonzero = np.sum(nonzero, axis = 1, keepdims = True)
    num_zero = data.shape[1] - num_nonzero
    insufficient_data = (num_nonzero <= SUFFICIENT_DATA_POINTS).flatten()
    prob_zero = num_zero / data.shape[1]
    data = ma.array(data = data, mask = ~nonzero)
    # Add small number to avoid 0s in the data causing issues.
    # Add small amount of noise to avoid 0s in the data causing issues
    # or all values being identical causing issues.
    data += np.random.uniform(1e-8, 0.2, size = data.shape)
    data_mean = np.mean(data, axis = 1)
    log_of_mean = np.log(data_mean)
    mean_of_logs = np.mean(ma.log(data), axis = 1)
    log_diff = mean_of_logs - log_of_mean
    shape = 0.5 / (log_of_mean - mean_of_logs)
    shape_reciprocal = 1  / shape
    difference = 1
    while difference > 0.000005:
        numerator = log_diff + np.log(shape) - digamma(shape)
        denominator = (shape ** 2) * (shape_reciprocal - polygamma(1, shape))
        tmp_shape_reciprocal = shape_reciprocal + numerator / denominator
        tmp_shape = 1 / tmp_shape_reciprocal
        difference = np.max(np.abs(tmp_shape - shape))
        shape = tmp_shape
        shape_reciprocal = tmp_shape_reciprocal
    scale = data_mean / shape
    if np.any(np.isnan(shape)) or np.any(np.isnan(scale)):
        warn("NaN shape or scale value")
    return (shape.data, scale.data, prob_zero.flatten(), insufficient_data)
Пример #27
0
    def gamma(self, mkt_dict_, engine_, unit_=None):
        """calculate option GAMMA with market data and engine"""
        _rate, _spot, _vol, _div, _method, _param, _sign, _strike, _t = self._prepare_risk_data(
            mkt_dict_, engine_)
        _unit = unit_ or self.unit

        if _method == EngineMethod.BS.value:
            _d1 = (log(_spot / _strike) +
                   (_rate + _vol**2 / 2) * _t) / _vol / sqrt(_t)
            return exp(-_d1**2 / 2) / sqrt(
                2 * pi) / _spot / _vol / sqrt(_t) * exp(-_div * _t) * _unit

        elif _method == EngineMethod.MC.value:
            from utils.monte_carlo import MonteCarlo
            _iteration = self._check_iter(
                _param[EngineParam.MCIteration.value])
            _spot = MonteCarlo.stock_price(_iteration,
                                           isp=_spot,
                                           rate=_rate,
                                           div=_div,
                                           vol=_vol,
                                           t=_t)
            _step = 0.01
            _gamma = [
                ((max(_sign * (_s + 2 * _step - _strike), 0) -
                  max(_sign * (_s - _strike), 0)) -
                 (max(_sign * (_s - _strike), 0) -
                  max(_sign * (_s - 2 * _step - _strike), 0))) / (4 * _step**2)
                for _s in _spot
            ]
            return average(_gamma) * exp(-_rate * _t) * _unit
Пример #28
0
    def pv(self, mkt_dict_, engine_, unit_=None):
        """calculate option PV with market data and engine"""
        _rate, _spot, _vol, _div, _method, _param, _sign, _strike, _t = self._prepare_risk_data(
            mkt_dict_, engine_)
        _unit = unit_ or self.unit

        if _method == EngineMethod.BS.value:
            _d1 = (log(_spot / _strike) +
                   (_rate - _div + _vol**2 / 2) * _t) / _vol / sqrt(_t)
            _d2 = _d1 - _vol * sqrt(_t)
            return _sign * (
                _spot * exp(-_div * _t) * norm.cdf(_sign * _d1) -
                _strike * exp(-_rate * _t) * norm.cdf(_sign * _d2)) * _unit

        elif _method == EngineMethod.MC.value:
            from utils.monte_carlo import MonteCarlo
            _iteration = self._check_iter(
                _param[EngineParam.MCIteration.value])
            _spot = MonteCarlo.stock_price(_iteration,
                                           isp=_spot,
                                           rate=_rate,
                                           div=_div,
                                           vol=_vol,
                                           t=_t)
            _price = [max(_sign * (_s - _strike), 0) for _s in _spot]
            return average(_price) * exp(-_rate * _t) * _unit
Пример #29
0
def F_ideal_int(g_s, density, multiplicities):
    rho_s = [density*g_i for g_i in g_s]
    rho_s_ma = [ma.masked_values(rho_i/density, 0) for rho_i in rho_s]
    log_s = [ma.log(rho_i_ma) for rho_i_ma in rho_s_ma]
    integral = sum([m*(rho_i * ma.filled(log_i, 0) - (rho_i - density)) \
                for m, rho_i, log_i in zip(multiplicities, rho_s, log_s)])
    return np.sum(integral)
Пример #30
0
    def preprocess(self, X, method):
        """
		Preprocess the data by scaling into the range of 0-1 with bins.
		"""
        if method == "bucket":  # scales into 0-1 range with bins
            print("using the bucket prep method")
            from sklearn.preprocessing import KBinsDiscretizer
            est = KBinsDiscretizer(n_bins=10,
                                   encode="ordinal",
                                   strategy="quantile")
            est.fit(X)
            X_processed = est.transform(X)
            X_processed /= 10  # transform from nominal values to 0-1
            return X_processed
        elif method == "clip":  # clips the raw counts into a certain range
            print("using the clip prep method")
            cutoff = 1000
            X_processed = np.minimum(X, cutoff) + np.sqrt(
                np.maximum(X - cutoff, 0))
            return X_processed
        elif method == "log":  # takes the log of the count
            print("using the log prep method")
            import numpy.ma as ma
            mask = ma.log(X)
            # mask logged data to replace NaN (log0) with 0
            X_processed = ma.fix_invalid(mask, fill_value=0).data
            return X_processed
        else:
            raise Exception("Incorrect preprocess method name passed!")
Пример #31
0
def F_derviative_slow(rho_s,
                      rho0_s,
                      u_s,
                      zr_s,
                      r,
                      x,
                      y,
                      z,
                      kBT,
                      voxel,
                      shape=None):
    """ Derivative of 3D-RISM functional:
    F'_3drism_i  = kBT*np.log(g_i (r)) + u_i(r) - kBT*sum(z_ij * h_j)
    
    return array of derivative with the shape n_s * grid
    """
    if np.any(shape):
        rho_s = np.reshape(rho_s, shape)
    if len(zr_s.shape) == 1:
        zr_s = zr_s.reshape((-1, 1))
    rho_s_ma = [ma.masked_values(rho_i/rho0_i, 0, atol=1.0e-15) for rho_i, rho0_i in\
                                                           zip(rho_s, rho0_s)]
    log_s = [ma.log(rho_i_ma) for rho_i_ma in rho_s_ma]
    deriv = []
    for i, (u_i, rho_i, log_i) in enumerate(zip(u_s, rho_s, log_s)):
        # we multipy u_i by kbt as well as it is actually u_i*beta
        mean_f_part = kBT * (ma.filled(log_i, -34.53877639491068) + u_i)
        ex_part = 0
        for rho_j, rho0_j, zr in zip(rho_s, rho0_s, zr_s.T):
            delta_rho = rho_j - rho0_j
            #convol = ndimage.filters.convolve(delta_rho, z_3d_ij, mode='constant')
            convol = slow_convolution(delta_rho, zr, r, x, y, z, voxel)
            ex_part += convol
        deriv.append(mean_f_part + ex_part * kBT)
    return np.array(deriv)
Пример #32
0
def F_derviative(rho_s, rho0_s, u_s, z_3ds, kBT, voxel, args, shape=None):
    if np.any(shape):
        rho_s = np.reshape(rho_s, shape)
    rho_s_ma = [ma.masked_values(rho_i/rho0_i, 0, atol=1.0e-15) for rho_i, rho0_i in\
                                                           zip(rho_s, rho0_s)]
    log_s = [ma.log(rho_i_ma) for rho_i_ma in rho_s_ma]
    deriv = []
    for i, (log_i, u_i, rho_i, z_3d_i) in\
                                     enumerate(zip(log_s, u_s, rho_s, z_3ds)):
        mean_f_part = kBT * (ma.filled(log_i, -34.53877639491068) + u_i)
        ex_part = 0
        for j, (rho_j, rho0_j,
                z_3d_ij) in enumerate(zip(rho_s, rho0_s, z_3ds[i, :])):
            delta_rho = rho_j - rho0_j
            delta_rho_k = np.fft.fftn(delta_rho)
            delta_rho_k = np.fft.fftshift(
                delta_rho_k)  # shift 0 freq to the middle
            convol = np.real(
                np.fft.ifftn(np.fft.ifftshift(delta_rho_k * z_3d_ij)))
            ex_part += kBT * convol / args.multiplicities[i]
        deriv_i = mean_f_part - ex_part
        #fix discontinuty
        deriv_i = np.where(deriv_i > 500, 500, deriv_i)
        deriv.append(deriv_i)
        print('median', np.median(deriv_i))
        print('avg', np.average(deriv_i))
    return np.array(deriv)  #- 0.122324502591
Пример #33
0
def F_ideal_int(rho_s, rho0_s):
    rho_s_ma = np.array([ma.masked_values(rho_i/rho0_i,0,atol=1.0e-15) for rho_i, rho0_i \
                           in zip(rho_s, rho0_s)])
    log_s = [ma.log(rho_i_ma) for rho_i_ma in rho_s_ma]
    integral = sum([rho_i * ma.filled(log_i, 0) - (rho_i - rho0_i) \
                        for rho_i, rho0_i, log_i in zip(rho_s, rho0_s, log_s)])
    return np.sum(integral)
Пример #34
0
    def __call__(self, value, clip=None):
        if clip is None:
            clip = self.clip

        if cbook.iterable(value):
            vtype = 'array'
            val = ma.asarray(value).astype(np.float)
        else:
            vtype = 'scalar'
            val = ma.array([value]).astype(np.float)

        self.autoscale_None(val)
        vmin, vmax = self.vmin, self.vmax
        if vmin > vmax:
            raise ValueError("minvalue must be less than or equal to maxvalue")
        elif vmin <= 0:
            raise ValueError("values must all be positive")
        elif vmin == vmax:
            return 0.0 * val
        else:
            if clip:
                mask = ma.getmask(val)
                val = ma.array(np.clip(val.filled(vmax), vmin, vmax),
                               mask=mask)
            result = (ma.log(val) - np.log(vmin)) / (np.log(vmax) -
                                                     np.log(vmin))
        if vtype == 'scalar':
            result = result[0]
        return result
Пример #35
0
def dewpoint(e):
    r'''Calculate the ambient dewpoint given the vapor pressure.

    Parameters
    ----------
    e : array_like
        Water vapor partial pressure in mb

    Returns
    -------
    array_like
        Dew point temperature in degrees Celsius.

    See Also
    --------
    dewpoint_rh, saturation_vapor_pressure, vapor_pressure

    Notes
    -----
    This function inverts the Bolton 1980 [3] formula for saturation vapor
    pressure to instead calculate the temperature. This yield the following
    formula for dewpoint in degrees Celsius:

    .. math:: T = \frac{243.5 log(e / 6.112)}{17.67 - log(e / 6.112)}

    References
    ----------
    .. [3] Bolton, D., 1980: The Computation of Equivalent Potential
           Temperature. Mon. Wea. Rev., 108, 1046-1053.
    '''

    val = log(e / sat_pressure_0c)
    return 243.5 * val / (17.67 - val)
Пример #36
0
def zonal_avg(data,Log=False):
    """
    Compute the zonal average of field on POP gx3v5 grid.
    Shape of input data is expected to be either [nfoo,nlat,nlon]
    or [nlat,nlon]. Log=True computes the geometric average.

    Output: arrays zavg and lat
    """
    print 'computing zonal average'
    # get lat and lon for new regular grid
#   fpin        = Nio.open_file('/home/ivan/Python/data/lat_t.nc','r')
    fpin        = Nio.open_file('/home/emunoz/Python/mapping/model_grid/lat_t.nc','r')
    lat_t       = fpin.variables['lat_t'][:]
    lat_t_edges = fpin.variables['lat_t_edges'][:]
    fpin.close()
#   fpin        = Nio.open_file('/home/ivan/Python/data/gx3v5.nc','r')
    fpin        = Nio.open_file('/home/emunoz/Python/mapping/model_grid/gx3v5.nc','r')
    lon_t       = N.sort(fpin.variables['TLONG'][0,:])
    ulon        = N.sort(fpin.variables['ULONG'][0,:])
    lon_t_edges = N.concatenate((ulon,ulon[0,N.newaxis]+360.),0)
    # get gx3v5 lat and lon
    tlon        = fpin.variables['TLONG'][:]
    tlat        = fpin.variables['TLAT'][:]
    fpin.close()

    # compute area of cells in new regular grid
    area = grid_area(lon_t_edges,lat_t_edges)

    nlat = lat_t.shape[0]
    nlon = lon_t.shape[0]

    if data.ndim == 3:
        new_data = MA.zeros((data.shape[0],nlat,nlon),dtype=float)
    elif data.ndim == 2:
        new_data = MA.zeros((nlat,nlon),dtype=float)
    else:
        print 'Check field dimensions'
        sys.exit()

    # geometric mean?
    if Log:
        work = MA.log(data)
    else:
        work = data

    # remap data to new regular grid
    for i in range(nlat):
        #print 'lat = %.2f'%(lat_t[i])
        for j in range(nlon):
            new_data[:,i,j] = extract_loc(lon_t[j],lat_t[i],tlon,tlat,work)

    # compute zonal average
    if Log:
        za_data = (MA.exp(MA.average(new_data,axis=-1,
            weights=N.resize(area,new_data.shape))))
    else:
        za_data = (MA.average(new_data,axis=-1,
            weights=N.resize(area,new_data.shape)))

    return za_data, lat_t
Пример #37
0
def geometric_mean(array, axis=0):
    '''return the geometric mean of an array removing all zero-values but
    retaining total length
    '''
    non_zero = ma.masked_values(array, 0)
    log_a = ma.log(non_zero)
    return ma.exp(log_a.mean(axis=axis))
Пример #38
0
Файл: Counts.py Проект: SCV/cgat
def geometric_mean(array, axis=0):
    '''return the geometric mean of an array removing all zero-values but
    retaining total length
    '''
    non_zero = ma.masked_values(array, 0)
    log_a = ma.log(non_zero)
    return ma.exp(log_a.mean(axis=axis))
Пример #39
0
 def test_testUfuncs1(self):
     # Test various functions such as sin, cos.
     (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d
     assert_(eq(np.cos(x), cos(xm)))
     assert_(eq(np.cosh(x), cosh(xm)))
     assert_(eq(np.sin(x), sin(xm)))
     assert_(eq(np.sinh(x), sinh(xm)))
     assert_(eq(np.tan(x), tan(xm)))
     assert_(eq(np.tanh(x), tanh(xm)))
     with np.errstate(divide='ignore', invalid='ignore'):
         assert_(eq(np.sqrt(abs(x)), sqrt(xm)))
         assert_(eq(np.log(abs(x)), log(xm)))
         assert_(eq(np.log10(abs(x)), log10(xm)))
     assert_(eq(np.exp(x), exp(xm)))
     assert_(eq(np.arcsin(z), arcsin(zm)))
     assert_(eq(np.arccos(z), arccos(zm)))
     assert_(eq(np.arctan(z), arctan(zm)))
     assert_(eq(np.arctan2(x, y), arctan2(xm, ym)))
     assert_(eq(np.absolute(x), absolute(xm)))
     assert_(eq(np.equal(x, y), equal(xm, ym)))
     assert_(eq(np.not_equal(x, y), not_equal(xm, ym)))
     assert_(eq(np.less(x, y), less(xm, ym)))
     assert_(eq(np.greater(x, y), greater(xm, ym)))
     assert_(eq(np.less_equal(x, y), less_equal(xm, ym)))
     assert_(eq(np.greater_equal(x, y), greater_equal(xm, ym)))
     assert_(eq(np.conjugate(x), conjugate(xm)))
     assert_(eq(np.concatenate((x, y)), concatenate((xm, ym))))
     assert_(eq(np.concatenate((x, y)), concatenate((x, y))))
     assert_(eq(np.concatenate((x, y)), concatenate((xm, y))))
     assert_(eq(np.concatenate((x, y, x)), concatenate((x, ym, x))))
Пример #40
0
 def test_testUfuncs1(self):
     # Test various functions such as sin, cos.
     (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d
     assert_(eq(np.cos(x), cos(xm)))
     assert_(eq(np.cosh(x), cosh(xm)))
     assert_(eq(np.sin(x), sin(xm)))
     assert_(eq(np.sinh(x), sinh(xm)))
     assert_(eq(np.tan(x), tan(xm)))
     assert_(eq(np.tanh(x), tanh(xm)))
     with np.errstate(divide='ignore', invalid='ignore'):
         assert_(eq(np.sqrt(abs(x)), sqrt(xm)))
         assert_(eq(np.log(abs(x)), log(xm)))
         assert_(eq(np.log10(abs(x)), log10(xm)))
     assert_(eq(np.exp(x), exp(xm)))
     assert_(eq(np.arcsin(z), arcsin(zm)))
     assert_(eq(np.arccos(z), arccos(zm)))
     assert_(eq(np.arctan(z), arctan(zm)))
     assert_(eq(np.arctan2(x, y), arctan2(xm, ym)))
     assert_(eq(np.absolute(x), absolute(xm)))
     assert_(eq(np.equal(x, y), equal(xm, ym)))
     assert_(eq(np.not_equal(x, y), not_equal(xm, ym)))
     assert_(eq(np.less(x, y), less(xm, ym)))
     assert_(eq(np.greater(x, y), greater(xm, ym)))
     assert_(eq(np.less_equal(x, y), less_equal(xm, ym)))
     assert_(eq(np.greater_equal(x, y), greater_equal(xm, ym)))
     assert_(eq(np.conjugate(x), conjugate(xm)))
     assert_(eq(np.concatenate((x, y)), concatenate((xm, ym))))
     assert_(eq(np.concatenate((x, y)), concatenate((x, y))))
     assert_(eq(np.concatenate((x, y)), concatenate((xm, y))))
     assert_(eq(np.concatenate((x, y, x)), concatenate((x, ym, x))))
Пример #41
0
    def __call__(self, value, clip=None):
        if clip is None:
            clip = self.clip

        if cbook.iterable(value):
            vtype = 'array'
            val = ma.asarray(value).astype(np.float)
        else:
            vtype = 'scalar'
            val = ma.array([value]).astype(np.float)

        self.autoscale_None(val)
        vmin, vmax = self.vmin, self.vmax
        if vmin > vmax:
            raise ValueError("minvalue must be less than or equal to maxvalue")
        elif vmin<=0:
            raise ValueError("values must all be positive")
        elif vmin==vmax:
            return 0.0 * val
        else:
            if clip:
                mask = ma.getmask(val)
                val = ma.array(np.clip(val.filled(vmax), vmin, vmax),
                                mask=mask)
            result = (ma.log(val)-np.log(vmin))/(np.log(vmax)-np.log(vmin))
        if vtype == 'scalar':
            result = result[0]
        return result
Пример #42
0
def dewpoint(e):
    r'''Calculate the ambient dewpoint given the vapor pressure.

    Parameters
    ----------
    e : array_like
        Water vapor partial pressure in mb

    Returns
    -------
    array_like
        Dew point temperature in degrees Celsius.

    See Also
    --------
    dewpoint_rh, saturation_vapor_pressure, vapor_pressure

    Notes
    -----
    This function inverts the Bolton 1980 [3] formula for saturation vapor
    pressure to instead calculate the temperature. This yield the following
    formula for dewpoint in degrees Celsius:

    .. math:: T = \frac{243.5 log(e / 6.112)}{17.67 - log(e / 6.112)}

    References
    ----------
    .. [3] Bolton, D., 1980: The Computation of Equivalent Potential
           Temperature. Mon. Wea. Rev., 108, 1046-1053.
    '''

    val = log(e / sat_pressure_0c)
    return 243.5 * val / (17.67 - val)
Пример #43
0
def index(request, *args, **kwags):
	if request.method == 'POST':
		data = ''
		s = 'Invalid Input!'
		form = ArForm(request.POST)
		if form.is_valid():
			print "VLAID DATA"
			data = form.cleaned_data['article']
			if len(data.strip()) < 20:
				return render(request, 'classifier/index.html', {"article": ""})
	
			dataW = fitWordArticle(data, vectorizer)
			dataG = createGrammarDictionary([data])
			dataF = np.concatenate((dataW[0], dataG[0]), axis=None)
			dataF = ma.log(dataF/dataF.sum()*100+1).filled(0)
			outside = []
			outside.append(dataF)
			outside = np.array(outside)
			print "TESTER"
			print outside.shape
			ynew = clsfyModel.predict(outside)
			print ynew
			if ynew[0][0] > ynew[0][1]:
				s = "You are a Milton Paper Writer"
			else:
				s = "You are a Milton Measure Writer"
		return render(request, 'classifier/index.html', {"article": str(s)})
	else:
		return render(request, 'classifier/index.html', {"article": ""})
Пример #44
0
 def transform_non_affine(self, a):
     sign = np.sign(a)
     masked = ma.masked_inside(a, -self.linthresh, self.linthresh, copy=False)
     log = sign * self.linthresh * (self._linscale_adj + ma.log(np.abs(masked) / self.linthresh) / self._log_base)
     if masked.mask.any():
         return ma.where(masked.mask, a * self._linscale_adj, log)
     else:
         return log
Пример #45
0
 def transform(self, a):
     sign = np.sign(np.asarray(a))
     masked = ma.masked_inside(a, -self.linthresh, self.linthresh, copy=False)
     log = sign * ma.log(np.abs(masked)) / self._log_base
     if masked.mask.any():
         return np.asarray(ma.where(masked.mask, a * self._linadjust, log))
     else:
         return np.asarray(log)
Пример #46
0
 def transform(self, a):
     sign = np.sign(a)
     masked = ma.masked_inside(a, -self.linthresh, self.linthresh, copy=False)
     log = sign * self.linthresh * (1 + ma.log(np.abs(masked) / self.linthresh))
     if masked.mask.any():
         return ma.where(masked.mask, a, log)
     else:
         return log
Пример #47
0
 def findEout(self, numSamples=1000):
     e_out = 0
     dataSamples = [self.createDataPoint() for _ in range(numSamples)]
     for x, y in dataSamples:
         e_out += log(1 +
                      exp(-1 * multiply(y, np.dot(transpose(self.w), x))))
     e_out /= float(numSamples)
     return e_out
Пример #48
0
 def transform(self, a):
     a = np.asarray(a)
     sign = np.sign(a)
     masked = ma.masked_inside(a, -self.linthresh, self.linthresh, copy=False)
     if masked.mask.any():
         log = sign * (ma.log(np.abs(masked)) / self._log_base + self._linadjust)
         return np.asarray(ma.where(masked.mask, a * self._linscale, log))
     else:
         return sign * (np.log(np.abs(a)) / self._log_base + self._linadjust)
Пример #49
0
def ln_shifted_auto(v):
    """If 'v' has values <= 0, it is shifted in a way that min(v)=1 before doing log. 
    Otherwise the log is done on the original 'v'."""
    vmin = ma.minimum(v)
    if vmin <= 0:
        values = v - vmin + 1
    else:
        values = v
    return ma.log(values)
Пример #50
0
def trainNB1(trainMatrix, trainCategory):
    numTrainDocs = len(trainMatrix)
    numWord = len(trainMatrix[0])
    pAbusive = sum(trainCategory) / float(numTrainDocs)
    p0Num = ones(numWord)
    p1Num = ones(numWord)
    p0Denom = 2.0
    p1Denom = 2.0
    for i in range(numTrainDocs):
        if trainCategory[i] == 1:
            p1Num += trainMatrix[i]
            p1Denom += sum(trainMatrix[i])
        else:
            p0Num += trainMatrix[i]
            p0Denom += sum(trainMatrix[i])
    p0Vect = log(p0Num / p0Denom)
    p1Vect = log(p1Num / p1Denom)
    return p0Vect, p1Vect, pAbusive
Пример #51
0
 def perform_tf_idf(self):
     words_per_doc = sum(self.a, axis=0)
     docs_per_word = sum(self.a, axis=1)
     rows, cols = self.a.shape
     for i in range(rows):
         for j in range(cols):
             if words_per_doc[j] != 0:
                 self.a[i, j] = (self.a[i, j] / words_per_doc[j]) * log(float(cols) / docs_per_word[i])
                 if isinf(self.a[i, j]):
                     print "Infinity found!"
Пример #52
0
def corr_proba(r, ndata, ndataset=2, dof=False):
    """Probability of rejecting correlations

    - **r**: Correlation coefficient
    - **ndata**: Number of records use for correlations
    - **ndataset**, optional:  Number of datasets (1 for autocorrelations, else 2) [default: 2]

    .. todo::

        This must be rewritten using :mod:`scipy.stats`
    """

    # Basic tests
    ndata = MA.masked_equal(ndata,0,copy=0)
    r = MV2.masked_where(MA.equal(MA.absolute(r),1.),r,copy=0)

    # Degree of freedom
    if dof:
        df = ndata
    else:
        df = ndata-2-ndataset

    # Advanced test: prevent extreme values by locally decreasing the dof
    reduc = N.ones(r.shape)
    z = None
    while z is None or MA.count(MA.masked_greater(z,-600.)):
        if z is not None:
            imax = MA.argmin(z.ravel())
            reduc.flat[imax] += 1
        dfr = df/reduc
        t = r*MV2.sqrt(dfr/((1.0-r)* (1.0+r)))
        a = 0.5*dfr
        b = 0.5
        x = df/(dfr+t**2)
        z = _gammaln(a+b)-_gammaln(a)-_gammaln(b)+a*MA.log(x)+b*MA.log(1.0-x)

    # Perfom the test and format the variable
    prob = MV2.masked_array(betai(a,b,x),axes=r.getAxisList())*100
    prob.id = 'corr_proba' ; prob.name = prob.id
    prob.long_name = 'Probability of rejection'
    prob.units = '%'

    return prob
Пример #53
0
  def __call__(self, value, clip=None):
    if clip is None:
      clip = self.clip

    if cbook.iterable(value):
      vtype = 'array'
      val = ma.asarray(value).astype(np.float)
    else:
      vtype = 'scalar'
      val = ma.array([value]).astype(np.float)

    self.autoscale_None(val)
    vmin, vmax = self.vmin, self.vmax
    vin, cin = self.vin, self.cin
    if vmin > vmax:
      raise ValueError("minvalue must be less than or equal to maxvalue")
    elif vmin > 0:
      raise ValueError("minvalue must be less than 0")
    elif vmax < 0:
      raise ValueError("maxvalue must be greater than 0")
    elif vmin==vmax:
      result = 0.0 * val
    else:
      if clip:
        mask = ma.getmask(val)
        val = ma.array(np.clip(val.filled(vmax), vmin, vmax),
                        mask=mask)
      ipos = (val > vin)
      ineg = (val < -vin)
      izero = ~(ipos | ineg)

      result = ma.empty_like(val)
      result[izero] = 0.5 + cin * val[izero] / vin
      result[ipos] = 0.5 + cin + (0.5 - cin) * \
                    (ma.log(val[ipos]) - np.log(vin)) / (np.log(vmax) - np.log(vin))
      result[ineg] = 0.5 - cin - (0.5 - cin) * \
                    (ma.log(-val[ineg]) - np.log(vin)) / (np.log(-vmin) - np.log(vin))
      result.mask = ma.getmask(val)
    if vtype == 'scalar':
      result = result[0]
    return result
Пример #54
0
def get_diff_jpdf_with_ini(A, P0, epsilon):
    """Get joint distribution with same marginal distribution with A
    and cross entropy with A is epsilon."""
    t, _ = A.shape
    obj_func = lambda x: dot(x, log(x))
    f_eqcons = lambda x: eq_cons(x, A, epsilon)
    out = fmin_slsqp(func = obj_func,
            x0 = P0.reshape(-1,),
            f_eqcons = f_eqcons,
            bounds = [[0, 1]] *t*t,
            )
    return  out.reshape(A.shape)
Пример #55
0
def main(tab_fname=None):
  varlist = []
  M = np.genfromtxt(name_iter(open(tab_fname), varlist), usemask=True, delimiter='\t')
  Q = ma.MaskedArray(data=M.data, mask=(M.mask|(M.data == 0)))
  Q = ma.log(Q)
  # save matrix back to .tab format
  fp = open(tab_fname + ".logscale.tab", "w")
  for i, row in enumerate(Q):
    fp.write(varlist[i] + '\t')
    fp.write('\t'.join(map(tostr, row)))
    fp.write('\n')
  fp.close()
Пример #56
0
def dewpoint(e):
    """
    Calculate the ambient dewpoint given the vapor pressure.

    e : scalar or array
        The water vapor partial pressure in mb.

    Returns : scalar or array
        The dew point temperature in degrees Celsius, with the shape
        of the result being determined using numpy's broadcasting rules.
    """
    val = log(e / sat_pressure_0c)
    return 243.5 * val / (17.67 - val)
Пример #57
0
def costFunction_Regular(theta, *args):
    # 参数顺序theta1,theta2 ,dataX_new,dataY_new
    #theta=reshape(theta,())
    '''
    theta1=args[0]   #传入初始theta的目的就是为了获得他们的维度而已
    theta2=args[1]
    dataX=args[2]  #5000*401
    m=shape(dataX)[0]
    dataY=args[3]  #5000*10
    K=args[4]   #   类别个数
    lamda =args[5]
    #print "dataX的维度:",shape(dataX)
    #print "dataY的维度:",shape(dataY)
    m1,n1=shape(theta1)
    m2,n2=shape(theta2)
    #print m1,n1,m1*n1
    theta1=reshape(theta[0:m1*n1],(m1,n1))
    theta2=reshape(theta[m1*n1:m1*n1+m2*n2],(m2,n2))
    '''
    theta1, theta2, dataX, K, dataY, m = getArgs(theta, args)
    hx = getPredictRes(dataX, theta1, theta2)  # 5000*10
    #print "!@!@!@!@!@!!@!@!@!@!@!@!@@!,",shape(hx)
    jtheta = 0
    '''
    K为类别数目,m为训练样本数
    之所以这里用了一个for循环而不用向量相乘,是因为如果纯用向量相乘最终只有对角线的数据有用,如果m,n很大,这样浪费时间以及空间
    由于K一般比m即训练样本的数目少很多,所以这里用向量相乘计算m个样本,外层用K个for循环,即时间复杂度为O(K),而不是O(m)
    '''
    for i in range(K):  #dataY[:,0] 为类别1
        Ak = -1 * dot(dataY[:, i].T, log(hx[:, i]))
        Bk = -1 * dot((1 - dataY[:, i]).T, log(1 - hx[:, i]))
        jtheta += Ak + Bk
    jtheta = jtheta * 1.0 / m
    #print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~",jtheta
    reguTerm = getRegularTerm(theta1, theta2, lamda, m)
    jtheta_Regular = jtheta + reguTerm
    print "#################jtheta_Regular", jtheta_Regular
    return jtheta_Regular
Пример #58
0
def _get_gamma_cdf(aseries, condition):
    """
    Returns the CDF values for aseries.

    Parameters
    ----------
    aseries : TimeSeries
        Annual series of data (one column per period)
    condition : TimeSeries
        Period mask.
    """
    # Mask the months for which no precipitations were recorded
    aseries_ = ma.masked_values(aseries, 0)
    # Get the proportion of 0 precipitation for each period (MM/WW)
    pzero = 1. - aseries_.count(axis=0) / aseries.count(axis=0).astype(float)
    # Mask outside the reference period
    aseries_._mask |= condition._data
    meanrain = aseries_.mean(axis=0)
    aleph = ma.log(meanrain) - ma.log(aseries_).mean(axis=0)
    alpha = (1. + ma.sqrt(1.+4./3*aleph)) / (4.*aleph)
    beta = meanrain/alpha
    # Get the Gamma CDF (per month)
    gcdf = pzero + (1.-pzero) * ssd.gamma.cdf(aseries,alpha,scale=beta)
    return gcdf
Пример #59
0
def geoMean(array):
    '''
    Generate the geometric mean of a list or array,
    removing all zero-values but retaining total length
    '''
    if isinstance(array, pandas.core.frame.DataFrame):
        array = array.as_matrix()
    else:
        pass
    non_zero = ma.masked_values(array,
                                0)

    log_a = ma.log(non_zero)
    geom_mean = ma.exp(log_a.mean())

    return geom_mean