Exemplo n.º 1
0
 def test_betai(self):
     np.random.seed(12345)
     for i in range(10):
         a = np.random.rand() * 5.
         b = np.random.rand() * 200.
         assert_equal(stats.betai(a, b, 0.), 0.)
         assert_equal(stats.betai(a, b, 1.), 1.)
         assert_equal(stats.mstats.betai(a, b, 0.), 0.)
         assert_equal(stats.mstats.betai(a, b, 1.), 1.)
         x = np.random.rand()
         assert_almost_equal(stats.betai(a, b, x),
                             stats.mstats.betai(a, b, x), decimal=13)
Exemplo n.º 2
0
 def test_betai(self):
     np.random.seed(12345)
     for i in range(10):
         a = np.random.rand() * 5.
         b = np.random.rand() * 200.
         assert_equal(stats.betai(a, b, 0.), 0.)
         assert_equal(stats.betai(a, b, 1.), 1.)
         assert_equal(stats.mstats.betai(a, b, 0.), 0.)
         assert_equal(stats.mstats.betai(a, b, 1.), 1.)
         x = np.random.rand()
         assert_almost_equal(stats.betai(a, b, x),
                             stats.mstats.betai(a, b, x), decimal=13)
Exemplo n.º 3
0
 def test_betai(self):
     """ test incomplete beta function """
     for i in range(10):
         a = np.random.rand()*5.
         b = np.random.rand()*200.
         assert_equal(stats.betai(a,b,0.),0.)
         assert_equal(stats.betai(a,b,1.),1.)
         assert_equal(stats.mstats.betai(a,b,0.),0.)
         assert_equal(stats.mstats.betai(a,b,1.),1.)
     for i in range(10):
         a = np.random.rand()*5.
         b = np.random.rand()*200.
         x = np.random.rand()
         assert_equal(stats.betai(a,b,x),stats.mstats.betai(a,b,x))
Exemplo n.º 4
0
    def test_betai(self):
        np.random.seed(12345)
        for i in range(10):
            a = np.random.rand() * 5.
            b = np.random.rand() * 200.

            with warnings.catch_warnings():
                warnings.filterwarnings('ignore', category=DeprecationWarning)
                assert_equal(stats.betai(a, b, 0.), 0.)
                assert_equal(stats.betai(a, b, 1.), 1.)
                assert_equal(stats.mstats.betai(a, b, 0.), 0.)
                assert_equal(stats.mstats.betai(a, b, 1.), 1.)
                x = np.random.rand()
                assert_almost_equal(stats.betai(a, b, x),
                                    stats.mstats.betai(a, b, x), decimal=13)
Exemplo n.º 5
0
    def test_betai(self):
        np.random.seed(12345)
        for i in range(10):
            a = np.random.rand() * 5.
            b = np.random.rand() * 200.

            with warnings.catch_warnings():
                warnings.filterwarnings('ignore', category=DeprecationWarning)
                assert_equal(stats.betai(a, b, 0.), 0.)
                assert_equal(stats.betai(a, b, 1.), 1.)
                assert_equal(stats.mstats.betai(a, b, 0.), 0.)
                assert_equal(stats.mstats.betai(a, b, 1.), 1.)
                x = np.random.rand()
                assert_almost_equal(stats.betai(a, b, x),
                                    stats.mstats.betai(a, b, x), decimal=13)
Exemplo n.º 6
0
 def get_pearsons_ps(pcorrel):
     from scipy.stats import betai
     df = pcorrel.shape[0]
     ix, iy = np.diag_indices_from(pcorrel)
     pcorrel[ix, iy] -= 1e-7
     t_sq = pcorrel**2 * (df / ((1.0 - pcorrel) * (1.0 + pcorrel)))
     return betai(0.5 * df, 0.5, df / (df + t_sq))
Exemplo n.º 7
0
def pearsonr(x, y):
    """
    generalized from scipy.stats.pearsonr
    """
    # x and y should have same length.

    x_shape = x.shape
    if len(x_shape) > 1:
        x = x.reshape((x_shape[0], prod(x_shape[1:])))

    x = np.asarray(x)
    y = np.asarray(y)
    n = len(x)
    mx = x.mean(0)
    my = y.mean(0)
    xm, ym = x - mx, y - my

    r_num = n * np.dot(xm.T, ym)
    r_den = n * np.sqrt(np.outer(ss(xm), ss(ym, 0)))

    r = (r_num / r_den)

    # Presumably, if r > 1, then it is only some small artifact of floating
    # point arithmetic.
    r = np.minimum(r, 1.0)
    df = n - 2

    # Use a small floating point value to prevent divide-by-zero nonsense
    # fixme: TINY is probably not the right value and this is probably not
    # the way to be robust. The scheme used in spearmanr is probably better.
    TINY = 1.0e-20
    t = r * np.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY)))
    prob = betai(0.5 * df, 0.5, df / (df + t * t))

    return r, prob
Exemplo n.º 8
0
def p_adj_map_from_scores(r, n=3539):
    '''Creates a p map with adjusted p values from scores (correlations)'''
    from scipy.stats import betai
    df = n-2
    t_squared = r*r * (df / ((1.0 - r) * (1.0 + r)))
    prob = betai(0.5*df, 0.5, df / (df+t_squared))
    return fdrcorrection0(prob)
Exemplo n.º 9
0
def pearson_corr(x, field):
    """Pearson correlation with 2-sided t-test

    Parameters:
        x: ndarray
            A 1D array time series.
        field: ndarray
            A 3D array of field values. The first dimension of the array needs 
            to be time.

    Returns: (ndarray, ndarray)
        Two ndarrays. A 2D array of Pearson correlation values and a 2D array of p-values.

    Notes:
        The p-values returned by this function are from a two-sided Student's 
        t-distribution. The test is against the null hypothesis that the 
        correlation is not significantly different from "0".
        This function could use some more work.
    """
    field = field.copy()
    f_oldshape = field.shape
    field.shape = (f_oldshape[0], f_oldshape[1] * f_oldshape[2])
    n = len(x)
    df = n - 2
    r = ((x[:, np.newaxis] * field).sum(axis=0) -
         n * x.mean() * field.mean(axis=0)) / (
             np.sqrt(np.sum(x**2) - n * x.mean()**2) *
             np.sqrt(np.sum(field**2, axis=0) - n * field.mean(axis=0)**2))
    t = r * np.sqrt(df / (1 - r**2))
    p = stats.betai(0.5 * df, 0.5, df / (df + t * t))
    r.shape = (f_oldshape[1], f_oldshape[2])
    p.shape = r.shape
    return r, p
Exemplo n.º 10
0
def adjust_r(r, n=3539, **fdr_params):
    from statsmodels.sandbox.stats.multicomp import fdrcorrection0
    from scipy.stats import betai
    df = n - 2
    t_squared = r * r * (df / ((1.0 - r) * (1.0 + r)))
    prob = betai(0.5 * df, 0.5, df / (df + t_squared))
    return fdrcorrection0(prob)
Exemplo n.º 11
0
def peak2sigma(psdpeak,n0):
    """ translates a psd peak height into a multi-trial NULL-hypothesis probability
    NOTE: dstarr replaces '0' with 0.000001 to catch float-point accuracy bugs
          Which I otherwise stumble into.
    """

    # Student's-T
    prob0 = betai( 0.5*n0-2.,0.5,(n0-1.)/(n0-1.+2.*psdpeak) )
    if (0.5*n0-2.<=0.000001):
      lprob0=0.
    elif ( (n0-1.)/(n0-1.+2.*psdpeak) <=0.000001 ):
      lprob0=-999.
    elif (prob0==0):
        lprob0=(0.5*n0-2.)*log( (n0-1.)/(n0-1.+2.*psdpeak)
) - log(0.5*n0-2.) - betaln(0.5*n0-2.,0.5)
    else: lprob0=log(prob0)

    # ballpark number of independent frequencies
    #  (Horne and Baliunas, eq. 13)
    horne = long(-6.362+1.193*n0+0.00098*n0**2.)
    if (horne <= 0): horne=5

    if (lprob0>log(1.e-4) and prob0>0):
	# trials correction, monitoring numerical precision
        lprob = log( 1. - exp( horne*log(1-prob0) ) )
    elif (lprob0+log(horne)>log(1.e-4) and prob0>0):
        lprob = log( 1. - exp( -horne*prob0 ) )
    else:
        lprob = log(horne) + lprob0

    sigma = lprob2sigma(lprob)

    return sigma
Exemplo n.º 12
0
def p_map_from_scores(r, n=3539):
    '''Creates a p map from scores (correlations)'''
    from scipy.stats import betai
    df = n-2
    t_squared = r*r * (df / ((1.0 - r) * (1.0 + r)))
    prob = betai(0.5*df, 0.5, df / (df+t_squared))
    return prob
Exemplo n.º 13
0
def pearsonr(x, y):
    """
    generalized from scipy.stats.pearsonr
    """
    # x and y should have same length.
    
    x_shape = x.shape
    if len(x_shape) > 1:
        x = x.reshape((x_shape[0],prod(x_shape[1:])))

    x = np.asarray(x)
    y = np.asarray(y)
    n = len(x)
    mx = x.mean(0)
    my = y.mean(0)
    xm, ym = x-mx, y-my

    r_num = n*np.dot(xm.T,ym)
    r_den = n*np.sqrt(np.outer(ss(xm),ss(ym,0)))

    r = (r_num / r_den)
    
    # Presumably, if r > 1, then it is only some small artifact of floating
    # point arithmetic.
    r = np.minimum(r, 1.0)
    df = n-2

    # Use a small floating point value to prevent divide-by-zero nonsense
    # fixme: TINY is probably not the right value and this is probably not
    # the way to be robust. The scheme used in spearmanr is probably better.
    TINY = 1.0e-20
    t = r*np.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
    prob = betai(0.5*df,0.5,df/(df+t*t))

    return r,prob
Exemplo n.º 14
0
Arquivo: misc.py Projeto: srcole/tools
def pearsonp(r, n):
    from scipy.stats import betai
    if abs(r) == 1:
        return 0
    else:
        df = n-2
        t_squared = r*r * (df / ((1.0 - r) * (1.0 + r)))
        return betai(0.5*df, 0.5, df / (df + t_squared))
Exemplo n.º 15
0
def p_from_r(r, n):
    r = max(min(r, 1.0), -1.0)
    df = n - 2
    if abs(r) == 1.0:
        prob = 0.0
    else:
        t_squared = r * r * (df / ((1.0 - r) * (1.0 + r)))
        prob = stats.betai(0.5 * df, 0.5, df / (df + t_squared))
    return prob
Exemplo n.º 16
0
def corrcoef(matrix):
    r = np.corrcoef(matrix)
    rf = r[np.triu_indices(r.shape[0], 1)]
    df = matrix.shape[1] - 2
    ts = rf * rf * (df / (1 - rf * rf))
    pf = betai(0.5 * df, 0.5, df / (df + ts))
    p = np.zeros(shape=r.shape)
    p[np.triu_indices(p.shape[0], 1)] = pf
    p[np.tril_indices(p.shape[0], -1)] = pf
    p[np.diag_indices(p.shape[0])] = np.ones(p.shape[0])
    return r, p
Exemplo n.º 17
0
def p_map_from_predictions(preds_pc, data_to_map):
    '''Creates a p map from predictions'''
    from sklearn.preprocessing import StandardScaler
    from scipy.stats import betai
    mx = StandardScaler().fit_transform(preds_pc)
    my = StandardScaler().fit_transform(data_to_map)
    n = mx.shape[0]
    r = (1/(n-1))*((mx*my).sum(axis=0))
    df = n-2
    t_squared = r*r * (df / ((1.0 - r) * (1.0 + r)))
    prob = betai(0.5*df, 0.5, df / (df+t_squared))
    return prob
Exemplo n.º 18
0
def corrcoef_matrix(matrix):
    # Code originating from http://stackoverflow.com/a/24547964 by http://stackoverflow.com/users/2455058/jingchao

    r = np.corrcoef(matrix)
    rf = r[np.triu_indices(r.shape[0], 1)]
    df = matrix.shape[1] - 2
    ts = rf * rf * (df / (1 - rf * rf))
    pf = betai(0.5 * df, 0.5, df / (df + ts))
    p = np.zeros(shape=r.shape)
    p[np.triu_indices(p.shape[0], 1)] = pf
    p[np.tril_indices(p.shape[0], -1)] = pf
    p[np.diag_indices(p.shape[0])] = np.ones(p.shape[0])
    return r, p
Exemplo n.º 19
0
def check_sample_mean(sm, v, n, popmean):
    # from stats.stats.ttest_1samp(a, popmean):
    # Calculates the t-obtained for the independent samples T-test on ONE group
    # of scores a, given a population mean.
    #
    # Returns: t-value, two-tailed prob
    df = n - 1
    svar = ((n - 1) * v) / float(df)  # looks redundant
    t = (sm - popmean) / np.sqrt(svar * (1.0 / n))
    prob = stats.betai(0.5 * df, 0.5, df / (df + t * t))

    # return t,prob
    npt.assert_(prob > 0.01, "mean fail, t,prob = %f, %f, m, sm=%f,%f" % (t, prob, popmean, sm))
Exemplo n.º 20
0
def check_sample_mean(sm,v,n, popmean):
    # from stats.stats.ttest_1samp(a, popmean):
    # Calculates the t-obtained for the independent samples T-test on ONE group
    # of scores a, given a population mean.
    #
    # Returns: t-value, two-tailed prob
    df = n-1
    svar = ((n-1)*v) / float(df)    # looks redundant
    t = (sm-popmean) / np.sqrt(svar*(1.0/n))
    prob = stats.betai(0.5*df, 0.5, df/(df+t*t))

    # return t,prob
    npt.assert_(prob > 0.01, 'mean fail, t,prob = %f, %f, m, sm=%f,%f' %
            (t, prob, popmean, sm))
Exemplo n.º 21
0
def correlation_matrix_vector(matrix, vector):
    '''Description here'''
    '''Matrix shape T,N'''
    '''Vector shape T  '''
    r = np.ones(shape=(matrix.shape[0]))
    p = np.ones(shape=(matrix.shape[0]))
    nt = matrix.shape[0]  #Time dimension
    data1_norm = (matrix - matrix.mean(axis=0)) / matrix.std(axis=0)
    data2_norm = (vector - vector.mean()) / vector.std()
    r = np.sum(np.swapaxes(data1_norm, 0, 1) * data2_norm / float(nt), axis=1)
    df = nt - 2  #DOF
    t_squared = r * r * (df / ((1.0 - r) * (1.0 + r)))
    p = betai(0.5 * df, 0.5, df / (df + t_squared))
    return r, p
Exemplo n.º 22
0
def corrcoef(matrix):
    """
    Received code from following link:
    http://stackoverflow.com/questions/24432101/correlation-coefficients-and-p-v
    alues-for-all-pairs-of-rows-of-a-matrix
    """
    r = np.corrcoef(matrix)
    rf = r[np.triu_indices(r.shape[0], 1)]
    df = matrix.shape[1] - 2
    ts = rf * rf * (df / (1 - rf * rf))
    pf = betai(0.5 * df, 0.5, df / (df + ts))
    p = np.zeros(shape=r.shape)
    p[np.triu_indices(p.shape[0], 1)] = pf
    p[np.tril_indices(p.shape[0], -1)] = pf
    p[np.diag_indices(p.shape[0])] = np.ones(p.shape[0])
    return r, p
Exemplo n.º 23
0
def chi2sigma(chi0,chi1,nu0,nharm):
    from scipy.stats import betai
    from scipy.special import betaln

    nu1 = nu0 - 2.*nharm
    dfn = nu0-nu1
    dfd = nu1
    sigma = 0.
    if (dfn>0 and dfd>0 and chi0>chi1):
        fstat = (chi0/chi1-1.)*dfd/dfn
        prob = betai( dfd/2., dfn/2., dfd/(dfd+dfn*fstat) )
        if (dfd<=0 or dfn<=0): lprob=0.
        elif (chi1==0): lprob=-999.
        elif (prob==0): lprob = 0.5*dfd*log( dfd/(dfd+dfn*fstat) )-log(dfd/2.)-betaln(dfd/2.,dfn/2.)
        else: lprob = log(prob)
        sigma = lprob2sigma(lprob)

    return sigma
Exemplo n.º 24
0
def check_sample_mean(sm,v,n, popmean):
    """
from stats.stats.ttest_1samp(a, popmean):
Calculates the t-obtained for the independent samples T-test on ONE group
of scores a, given a population mean.

Returns: t-value, two-tailed prob
"""
##    a = asarray(a)
##    x = np.mean(a)
##    v = np.var(a, ddof=1)
##    n = len(a)
    df = n-1
    svar = ((n-1)*v) / float(df)    #looks redundant
    t = (sm-popmean)/np.sqrt(svar*(1.0/n))
    prob = stats.betai(0.5*df,0.5,df/(df+t*t))

    #return t,prob
    assert prob>0.01, 'mean fail, t,prob = %f, %f, m,sm=%f,%f' % (t,prob,popmean,sm)
Exemplo n.º 25
0
def check_sample_mean(sm,v,n, popmean):
    """
from stats.stats.ttest_1samp(a, popmean):
Calculates the t-obtained for the independent samples T-test on ONE group
of scores a, given a population mean.

Returns: t-value, two-tailed prob
"""
##    a = asarray(a)
##    x = np.mean(a)
##    v = np.var(a, ddof=1)
##    n = len(a)
    df = n-1
    svar = ((n-1)*v) / float(df)    # looks redundant
    t = (sm-popmean)/np.sqrt(svar*(1.0/n))
    prob = stats.betai(0.5*df,0.5,df/(df+t*t))

    # return t,prob
    npt.assert_(prob > 0.01, 'mean fail, t,prob = %f, %f, m,sm=%f,%f' % (t,prob,popmean,sm))
Exemplo n.º 26
0
def reciprocity(G, nbunch = None,  weight = None):
    
    if nbunch is not None:
        nodes = np.sort(G.nodes())
        nbunch = np.sort(nbunch)
        fnodes = np.setdiff1d(nodes,nbunch)
        nodelist = np.append(nbunch, fnodes)
    else:
        nbunch = G.nodes()
        nodelist = G.nodes()
        fnodes = list()
    
    W = np.array(nx.to_numpy_matrix(G, nodelist = nodelist))
    indices = np.diag_indices_from(W)
    W[indices] = 0.
    
    if weight is None:
        
        W = 1. * (W > 0)

    l = float(W.sum())
    n = len(nbunch)
    m = len(fnodes)
    df = n  * (n - 1  + 2 * m)
    a = l / df # this is to take into account that the maximal number of observable links is lower than (n+m)*(n+m-1)
    W[n:,n:] = a # this is to set the terms corresponding to unobserved exposures to zero

    l2 = (W**2).sum()
    omega =  l2 / l
        
    rho = (W * W.T).sum() / l
    rho = (rho - a) / (omega - a)
    
    rho = max(min(rho, 1.0), - 1.0)

    if abs(rho) == 1.0:
        prob = 0.0
    else:
        t_squared = rho * rho * (df / ((1.0 - rho) * (1.0 + rho)))
        prob = betai(0.5*df, 0.5, df / (df + t_squared))    
    
    return rho,prob
def chi2sigma(chi0, chi1, nu0, nharm):
    from scipy.stats import betai
    from scipy.special import betaln

    nu1 = nu0 - 2. * nharm
    dfn = nu0 - nu1
    dfd = nu1
    sigma = 0.
    if (dfn > 0 and dfd > 0 and chi0 > chi1):
        fstat = (chi0 / chi1 - 1.) * dfd / dfn
        prob = betai(dfd / 2., dfn / 2., dfd / (dfd + dfn * fstat))
        if (dfd <= 0 or dfn <= 0): lprob = 0.
        elif (chi1 == 0): lprob = -999.
        elif (prob == 0):
            lprob = 0.5 * dfd * log(dfd / (dfd + dfn * fstat)) - log(
                dfd / 2.) - betaln(dfd / 2., dfn / 2.)
        else:
            lprob = log(prob)
        sigma = lprob2sigma(lprob)

    return sigma
Exemplo n.º 28
0
    def genGBM(self, alpha, beta, mu, sigma):
        """This function produces a time-series based on Geometric Brownian
        Motion (GBM), filtered through a beta distribution for scaling.
        ***THIS NEEDS WORK***
        """
        
        self.prices = [np.random.binomial(1,.5)]
        t = np.arange(0,1,step=.01)
        S0 = np.random.random()
        Wt = np.cumsum(np.random.randn(100))
        
        signal = S0 * np.exp((mu-sigma**2/2)*t + sigma*Wt)
        res = stats.betai(alpha, beta, abs(signal/max(signal)))
        
        for i in res:
            self.prices.append(i)

        self.prices = list(reversed(self.prices))
        self.prices = np.around(self.prices,decimals=2)
        self.pricesNO = [abs(1-x) for x in self.prices]
        self.pricesNO = np.around(self.pricesNO,decimals=2)
Exemplo n.º 29
0
def correlation_2_arrays(data1, data2, axis=0):
    '''Description here'''
    r = np.ones(shape=(data1.shape[0]))
    p = np.ones(shape=(data1.shape[0]))
    nt = data1.shape[axis]
    assert data1.shape == data2.shape
    view1 = data1
    view2 = data2

    if axis:
        view1 = np.rollaxis(data1, axis)
        view2 = np.rollaxis(data2, axis)

    data1_norm = (view1 - data1.mean(axis=axis)) / data1.std(axis=axis)
    data2_norm = (view2 - data2.mean(axis=axis)) / data2.std(axis=axis)
    r = np.sum(data1_norm * data2_norm / float(nt), axis=0)

    df = nt - 2
    t_squared = r * r * (df / ((1.0 - r) * (1.0 + r)))
    p = betai(0.5 * df, 0.5, df / (df + t_squared))
    return r, p
Exemplo n.º 30
0
 def genVarGamma_beta(self, alpha, beta, mu, sigma, theta, nu, plot=False):
     """Generates 100 random variables that are variance gamma distriuted and
     then filtered through a beta distribution for scaling.
     
     ***THIS SEEMS TO WORK***
     """
     
     t = np.arange(0,1,step=.01)
     self.prices = [np.random.binomial(1,.5)]   
     
     signal = vg.rnd(100, mu, sigma, theta, nu)
     res = stats.betai(alpha, beta, np.abs(signal))
     
     for i in res:
         self.prices.append(i)
         
     self.prices = list(reversed(self.prices))
     self.prices = np.around(self.prices,decimals=2)
     self.pricesNO = [abs(1-x) for x in self.prices]
     self.pricesNO = np.around(self.pricesNO,decimals=2)
     
     if plot == True:
         plt.plot(self.prices)
Exemplo n.º 31
0
    def genABM_beta(self, alpha, beta, mu, sigma, plot=False):
        """This function produces a time-series based on Arithmetic Brownian
        Motion (ABM), filtered through a beta distribution for scaling.
        
        ***THIS NEEDS WORK***
        """

        t = np.arange(0,1,step=.01)
        self.prices = [np.random.binomial(1,.5)]
        Wt = np.cumsum(np.random.randn(100))
        
        signal = self.prices[0] + ((mu-sigma**2/2)*t + sigma*Wt)
        res = stats.betai(alpha, beta, abs(signal/max(signal)))
        
        for i in res:
            self.prices.append(i)
                
        self.prices = list(reversed(self.prices))
        self.prices = np.around(self.prices,decimals=2)
        self.pricesNO = [abs(1-x) for x in self.prices]
        self.pricesNO = np.around(self.pricesNO,decimals=2)
    
        if plot == True:
            plt.plot(self.prices)
Exemplo n.º 32
0
def peak2sigma(psdpeak, n0):
    """ translates a psd peak height into a multi-trial NULL-hypothesis probability
    NOTE: dstarr replaces '0' with 0.000001 to catch float-point accuracy bugs
          Which I otherwise stumble into.
    """

    # Student's-T
    prob0 = betai(0.5 * n0 - 2., 0.5, (n0 - 1.) / (n0 - 1. + 2. * psdpeak))
    if (0.5 * n0 - 2. <= 0.000001):
        lprob0 = 0.
    elif ((n0 - 1.) / (n0 - 1. + 2. * psdpeak) <= 0.000001):
        lprob0 = -999.
    elif (prob0 == 0):
        lprob0 = (0.5 * n0 - 2.) * log(
            (n0 - 1.) /
            (n0 - 1. + 2. * psdpeak)) - log(0.5 * n0 - 2.) - betaln(
                0.5 * n0 - 2., 0.5)
    else:
        lprob0 = log(prob0)

    # ballpark number of independent frequencies
    #  (Horne and Baliunas, eq. 13)
    horne = long(-6.362 + 1.193 * n0 + 0.00098 * n0**2.)
    if (horne <= 0): horne = 5

    if (lprob0 > log(1.e-4) and prob0 > 0):
        # trials correction, monitoring numerical precision
        lprob = log(1. - exp(horne * log(1 - prob0)))
    elif (lprob0 + log(horne) > log(1.e-4) and prob0 > 0):
        lprob = log(1. - exp(-horne * prob0))
    else:
        lprob = log(horne) + lprob0

    sigma = lprob2sigma(lprob)

    return sigma
Exemplo n.º 33
0
def f_test_probability(N, p1, Chi2_1, p2, Chi2_2):
    """Return F-Test probability that the simpler model is correct.

      e.g. p1 = 5.; //number of PPM parameters
      e.g. p2 = p1 + 7.; // number of PPM + orbital parameters

    :param N: int
        Number of data points
    :param p1: int
        Number of parameters of the simpler model
    :param Chi2_1: float
        chi^2 corresponding to the simpler model
    :param p2: int
        Number of parameters of the model with more parameters
        p2 > p1
    :param Chi2_2: float
        chi^2 corresponding to the model with more parameters
    :return:
        prob: float
        probability

    """

    nu1 = p2 - p1
    nu2 = N - p2  # degrees of freedom

    if (Chi2_1 < Chi2_2):
        raise RuntimeWarning('Solution better with less parameters')

    # F test
    F0 = nu2 / nu1 * (Chi2_1 - Chi2_2) / Chi2_2

    # probability
    prob = betai(0.5 * nu2, 0.5 * nu1, nu2 / (nu2 + F0 * nu1))

    return prob
Exemplo n.º 34
0
  def doCorrelationIDR(self,ID,layer1,layer2):

    # first get stats for each layer
    [layer1sum, layer1n]=self.sumLayer(layer1[0],layer1[1])
    [layer2sum, layer2n]=self.sumLayer(layer2[0],layer2[1])
    layer1mean=layer1sum/layer1n
    layer2mean=layer2sum/layer2n

    # get layer extents based on first layer
    xMin=layer1[0].extent().xMinimum()
    xMax=layer1[0].extent().xMaximum()
    yMin=layer1[0].extent().yMinimum()
    yMax=layer1[0].extent().yMaximum()
    xDim=layer1[0].width()
    yDim=layer1[0].height()
    xSize=(xMax-xMin)/float(xDim)
    ySize=(yMax-yMin)/float(yDim)

    # initialise summing variables
    [mySum,mySumz1m,mySumz2m,myN]=[0,0,0,0]
    myNDV=QString(u'null (no data)')
    myOE=QString(u'out of extent')

    # loop through pixels in first layer
    for i in range(xDim):
      x=xMin+(xSize/2)+(i*xSize)
      for j in range(yDim):
        y=yMin+(ySize/2)+(j*ySize)

        # fetch values for this point
        z1=layer1[0].identify(QgsPoint(x,y))[1].values()[layer1[1]]
        z2=layer2[0].identify(QgsPoint(x,y))[1].values()[layer2[1]]

        # only consider where both grids are valid
        if not (z1==myNDV or z1==myOE or z2==myNDV or z2==myOE):
          z1=float(z1)
          z2=float(z2)
          myN+=1
          if ID=="I":
            mySum+=pow(pow(z1/layer1sum,0.5)-pow(z2/layer2sum,0.5),2)
          elif ID=="D":
            mySum+=abs(z1/layer1sum - z2/layer2sum)
          elif ID=="R":
            z1m=z1-layer1mean
            z2m=z2-layer2mean
            mySum+=z1m*z2m
            mySumz1m+=pow(z1m,2)
            mySumz2m+=pow(z2m,2)
    
    [myCor,myP]=[None,None]
    # final calculations
    if ID=="I":
      myCor= 1 - (0.5 * pow(mySum,0.5))
      myP=None
    elif ID=="D":
      myCor= 1 - (0.5 * mySum)
      myP=None
    elif ID=="R":
      if mySumz1m*mySumz1m>0:
        myCor= mySum / (pow(mySumz1m,0.5)*pow(mySumz2m,0.5))
        myDF=myN-2
        myPprelim=myCor*pow(myDF/((1-myCor)*(1+myCor)),0.5)
        myP=betai(0.5*myDF,0.5,(myDF/(myDF+pow(myPprelim,2))))

    return [myCor,myP]
Exemplo n.º 35
0
    def __call__(self, table, weight=None, verbose=0):
        """
        :param table: data instances.
        :type table: :class:`Orange.data.Table`
        :param weight: the weights for instances. Default: None, i.e.
            all data instances are eqaully important in fitting
            the regression parameters
        :type weight: None or list of Orange.feature.Continuous
            which stores weights for instances
        """
        if not self.use_vars is None:
            new_domain = Orange.data.Domain(self.use_vars,
                                            table.domain.class_var)
            new_domain.addmetas(table.domain.getmetas())
            table = Orange.data.Table(new_domain, table)

        # dicrete values are continuized
        table = self.continuize_table(table)

        # missing values are imputed
        table = self.impute_table(table)

        if self.stepwise:
            use_vars = stepwise(table,
                                weight,
                                add_sig=self.add_sig,
                                remove_sig=self.remove_sig)
            new_domain = Orange.data.Domain(use_vars, table.domain.class_var)
            new_domain.addmetas(table.domain.getmetas())
            table = Orange.data.Table(new_domain, table)

        # convertion to numpy
        A, y, w = table.to_numpy()
        if A is None:
            n, m = len(table), 0
        else:
            n, m = numpy.shape(A)

        if self.intercept:
            if A is None:
                X = numpy.ones([n, 1])
            else:
                X = numpy.insert(A, 0, 1, axis=1)  # adds a column of ones
        else:
            X = A

        domain = table.domain

        if numpy.std(y) < 10e-6:  # almost constant variable
            return Orange.regression.mean.MeanLearner(table)

        # set weights to the instances
        W = numpy.identity(n)
        if weight:
            for i, ins in enumerate(table):
                W[i, i] = float(ins[weight])

        compute_stats = self.compute_stats
        # adds some robustness by computing the pseudo inverse;
        # normal inverse could fail due to singularity of the X.T * W * X
        if self.ridge_lambda is None:
            cov = pinv(dot(dot(X.T, W), X))
        else:
            cov = pinv(
                dot(dot(X.T, W), X) - self.ridge_lambda * numpy.eye(m + 1))
            compute_stats = False  # TO DO: find inferential properties of the estimators
        D = dot(dot(cov, X.T), W)
        coefficients = dot(D, y)

        mu_y, sigma_y = numpy.mean(y), numpy.std(y)
        if A is not None:
            cov_x = numpy.cov(X, rowvar=0)

            # standardized coefficients
            std_coefficients = (sqrt(cov_x.diagonal()) / sigma_y) \
                                * coefficients
        else:
            std_coefficients = None

        if compute_stats is False:
            return LinearRegression(domain.class_var,
                                    domain,
                                    coefficients=coefficients,
                                    std_coefficients=std_coefficients,
                                    intercept=self.intercept)

        fitted = dot(X, coefficients)
        residuals = [ins.get_class() - fitted[i] \
                     for i, ins in enumerate(table)]

        # model summary
        # total sum of squares (total variance)
        sst = numpy.sum((y - mu_y)**2)
        # sum of squares due to regression (explained variance)
        ssr = numpy.sum((fitted - mu_y)**2)
        # error sum of squares (unexplaied variance)
        sse = sst - ssr
        # coefficient of determination
        r2 = ssr / sst
        r2adj = 1 - (1 - r2) * (n - 1) / (n - m - 1)
        F = (ssr / m) / (sst - ssr / (n - m - 1))
        df = n - 2
        sigma_square = sse / (n - m - 1)
        # standard error of the regression estimator, t-scores and p-values
        std_error = sqrt(sigma_square * pinv(dot(X.T, X)).diagonal())
        t_scores = coefficients / std_error
        p_vals = [stats.betai(df*0.5,0.5,df/(df + t*t)) \
                  for t in t_scores]

        # dictionary of regression coefficients with standard errors
        # and p-values
        dict_model = {}
        if self.intercept:
            dict_model["Intercept"] = (coefficients[0],\
                                      std_error[0], \
                                      t_scores[0], \
                                      p_vals[0])
        for i, var in enumerate(domain.attributes):
            j = i + 1 if self.intercept else i
            dict_model[var.name] = (coefficients[j], \
                                   std_error[j],\
                                   t_scores[j],\
                                   p_vals[j])

        return LinearRegression(domain.class_var,
                                domain,
                                coefficients,
                                F,
                                std_error=std_error,
                                t_scores=t_scores,
                                p_vals=p_vals,
                                dict_model=dict_model,
                                fitted=fitted,
                                residuals=residuals,
                                m=m,
                                n=n,
                                mu_y=mu_y,
                                r2=r2,
                                r2adj=r2adj,
                                sst=sst,
                                sse=sse,
                                ssr=ssr,
                                std_coefficients=std_coefficients,
                                intercept=self.intercept)
Exemplo n.º 36
0
    psd, freqs, signi, sim_signi, peak_sort = lomb(noisetime,noisedata,delta_time=dnoisedata,
signal_err=dnoisedata,freqin=frequencies,fap=fap,multiple=multiple)

    #peak location
    imax = psd.argmax()
    freq_max = freqs[imax]

    mpsd=max(psd)
    print ("Peak=%.2f @ %.2f Hz, significance estimate: %.1f-sigma (T-test)") % (mpsd,freq_max,signi)

    if (len(peak_sort)>0):

      psd0 = peak_sort[ long((1-fap)*(multiple-1)) ]
      print ("Expected peak %.2f for False Alarm of %.2e") % (psd0,fap)

      Prob0 = betai( 0.5*N-2.,0.5,(N-1.)/(N-1.+2.*psd0) )
      Nindep = log(1-fap)/log(1-Prob0)
      horne = long(-6.362+1.193*N+0.00098*N**2.)
      if (horne <= 0): horne=5
      print ("Estimated number of independent trials: %.2f (horne=%d)") % (Nindep,horne)

      nover = sum( peak_sort>=mpsd )
      print ("Fraction of simulations with peak greater than observed value: %d/%d") % (nover,multiple)

"""
import Gnuplot
import time
plotobj = Gnuplot.Gnuplot()
plotobj.xlabel('Period (s)')
plotobj.ylabel('LS Periodogram')
plotobj('set logscale x')
Exemplo n.º 37
0
                                                   multiple=multiple)

    #peak location
    imax = psd.argmax()
    freq_max = freqs[imax]

    mpsd = max(psd)
    print("Peak=%.2f @ %.2f Hz, significance estimate: %.1f-sigma (T-test)"
          ) % (mpsd, freq_max, signi)

    if (len(peak_sort) > 0):

        psd0 = peak_sort[long((1 - fap) * (multiple - 1))]
        print("Expected peak %.2f for False Alarm of %.2e") % (psd0, fap)

        Prob0 = betai(0.5 * N - 2., 0.5, (N - 1.) / (N - 1. + 2. * psd0))
        Nindep = log(1 - fap) / log(1 - Prob0)
        horne = long(-6.362 + 1.193 * N + 0.00098 * N**2.)
        if (horne <= 0): horne = 5
        print("Estimated number of independent trials: %.2f (horne=%d)") % (
            Nindep, horne)

        nover = sum(peak_sort >= mpsd)
        print(
            "Fraction of simulations with peak greater than observed value: %d/%d"
        ) % (nover, multiple)
"""
import Gnuplot
import time
plotobj = Gnuplot.Gnuplot()
plotobj.xlabel('Period (s)')
Exemplo n.º 38
0
    def __call__(self, table, weight=None, verbose=0):
        """
        :param table: data instances.
        :type table: :class:`Orange.data.Table`
        :param weight: the weights for instances. Default: None, i.e.
            all data instances are equally important in fitting
            the regression parameters
        :type weight: None or list of Orange.feature.Continuous
            which stores weights for instances
        """
        if self.use_vars is not None:
            new_domain = Orange.data.Domain(self.use_vars,
                                            table.domain.class_var)
            new_domain.addmetas(table.domain.getmetas())
            table = Orange.data.Table(new_domain, table)

        # discrete values are continuized
        table = self.continuize_table(table)

        # missing values are imputed
        table = self.impute_table(table)

        if self.stepwise:
            use_vars = stepwise(table, weight, add_sig=self.add_sig,
                                remove_sig=self.remove_sig)
            new_domain = Orange.data.Domain(use_vars, table.domain.class_var)
            new_domain.addmetas(table.domain.getmetas())
            table = Orange.data.Table(new_domain, table)

        domain = table.domain

        # convert to numpy
        X, y, w = table.to_numpy()
        n, m = numpy.shape(X)

        if self.intercept:
            X = numpy.insert(X, 0, 1, axis=1) # adds a column of ones

        if weight:
            weights = numpy.sqrt([float(ins[weight]) for ins in table])
            X = weights.reshape(n, 1) * X
            y = weights * y

        cov = dot(X.T, X)

        if self.ridge_lambda:
            stride = cov.shape[0] + 1
            cov.flat[self.intercept * stride::stride] += self.ridge_lambda

        # adds some robustness by computing the pseudo inverse;
        # normal inverse could fail due to the singularity of X.T * X
        invcov = pinv(cov)
        D = dot(invcov, X.T)
        coefficients = dot(D, y)

        mu_y, sigma_y = numpy.mean(y), numpy.std(y)
        if m > 0:
            # standardized coefficients
            std_coefficients = std(X, axis=0, ddof=1) / sigma_y * coefficients
        else:
            std_coefficients = None

        # TODO: find inferential properties of the estimators for ridge
        if self.compute_stats is False or self.ridge_lambda:
            return LinearRegression(domain.class_var, domain,
                coefficients=coefficients, std_coefficients=std_coefficients,
                intercept=self.intercept)

        fitted = dot(X, coefficients)
        residuals = [ins.get_class() - fitted[i]
                     for i, ins in enumerate(table)]

        # model summary
        df_reg = n - m - self.intercept
        # total sum of squares (total variance)
        sst = numpy.sum((y - mu_y) ** 2)
        # regression sum of squares (explained variance)
        ssr = numpy.sum((fitted - mu_y) ** 2)
        # residual sum of squares
        sse = numpy.sum((y - fitted) ** 2)
        # coefficient of determination
        r2 = ssr / sst
        r2 = 1 - sse / sst
        r2adj = 1 - (1 - r2) * (n - 1) / df_reg
        F = (ssr / m) / ((sst - ssr) / df_reg) if m else 0
        sigma_square = sse / df_reg
        # standard error of the regression estimator, t-scores and p-values
        std_error = sqrt(sigma_square * invcov.diagonal())
        t_scores = coefficients / std_error
        df_res = n - 2
        p_vals = [stats.betai(df_res * 0.5, 0.5, df_res / (df_res + t * t))
                  for t in t_scores]

        # dictionary of regression coefficients with standard errors
        # and p-values
        dict_model = {}
        if self.intercept:
            dict_model["Intercept"] = (coefficients[0], std_error[0],
                                       t_scores[0], p_vals[0])
        for i, var in enumerate(domain.features):
            j = i + 1 if self.intercept else i
            dict_model[var.name] = (coefficients[j], std_error[j],
                                    t_scores[j], p_vals[j])

        return LinearRegression(domain.class_var, domain, coefficients, F,
                 std_error=std_error, t_scores=t_scores, p_vals=p_vals,
                 dict_model=dict_model, fitted=fitted, residuals=residuals,
                 m=m, n=n, mu_y=mu_y, r2=r2, r2adj=r2adj, sst=sst, sse=sse,
                 ssr=ssr, std_coefficients=std_coefficients,
                 intercept=self.intercept)
Exemplo n.º 39
0
    def __call__(self, table, weight=None, verbose=0):
        """
        :param table: data instances.
        :type table: :class:`Orange.data.Table`
        :param weight: the weights for instances. Default: None, i.e.
            all data instances are eqaully important in fitting
            the regression parameters
        :type weight: None or list of Orange.feature.Continuous
            which stores weights for instances
        """       
        if not self.use_vars is None:
            new_domain = Orange.data.Domain(self.use_vars,
                                            table.domain.class_var)
            new_domain.addmetas(table.domain.getmetas())
            table = Orange.data.Table(new_domain, table)

        # dicrete values are continuized        
        table = self.continuize_table(table)
          
        # missing values are imputed
        table = self.impute_table(table)

        if self.stepwise:
            use_vars = stepwise(table, weight, add_sig=self.add_sig,
                                      remove_sig=self.remove_sig)
            new_domain = Orange.data.Domain(use_vars, table.domain.class_var)
            new_domain.addmetas(table.domain.getmetas())
            table = Orange.data.Table(new_domain, table)

        # convertion to numpy
        A, y, w = table.to_numpy()
        if A is None:
            n, m = len(table), 0
        else:
            n, m = numpy.shape(A)
     
        if self.intercept:
            if A is None:
                X = numpy.ones([n,1])
            else:
                X = numpy.insert(A, 0, 1, axis=1) # adds a column of ones
        else:
            X = A
             
        domain = table.domain
        
        if numpy.std(y) < 10e-6: # almost constant variable
            return Orange.regression.mean.MeanLearner(table)
     
        # set weights to the instances
        W = numpy.identity(n)
        if weight:
            for i, ins in enumerate(table):
                W[i, i] = float(ins[weight])

        compute_stats = self.compute_stats
        # adds some robustness by computing the pseudo inverse;
        # normal inverse could fail due to singularity of the X.T * W * X
        if self.ridge_lambda is None:
            cov = pinv(dot(dot(X.T, W), X))
        else:
            cov = pinv(dot(dot(X.T, W), X) - self.ridge_lambda*numpy.eye(m+1))
            compute_stats = False # TO DO: find inferential properties of the estimators
        D = dot(dot(cov, X.T), W)
        coefficients = dot(D, y)

        mu_y, sigma_y = numpy.mean(y), numpy.std(y)
        if A is not None:
            cov_x = numpy.cov(X, rowvar=0)

            # standardized coefficients
            std_coefficients = (sqrt(cov_x.diagonal()) / sigma_y) \
                                * coefficients
        else:
            std_coefficients = None

        if compute_stats is False:
            return LinearRegression(domain.class_var, domain, coefficients=coefficients,
                                    std_coefficients=std_coefficients, intercept=self.intercept)
            

        fitted = dot(X, coefficients)
        residuals = [ins.get_class() - fitted[i] \
                     for i, ins in enumerate(table)]

        # model summary        
        # total sum of squares (total variance)
        sst = numpy.sum((y - mu_y) ** 2)
        # sum of squares due to regression (explained variance)
        ssr = numpy.sum((fitted - mu_y)**2)
        # error sum of squares (unexplaied variance)
        sse = sst - ssr
        # coefficient of determination
        r2 = ssr / sst
        r2adj = 1-(1-r2)*(n-1)/(n-m-1)
        F = (ssr/m)/(sst-ssr/(n-m-1))
        df = n-2 
        sigma_square = sse/(n-m-1)
        # standard error of the regression estimator, t-scores and p-values
        std_error = sqrt(sigma_square*pinv(dot(X.T, X)).diagonal())
        t_scores = coefficients/std_error
        p_vals = [stats.betai(df*0.5,0.5,df/(df + t*t)) \
                  for t in t_scores]

        # dictionary of regression coefficients with standard errors
        # and p-values
        dict_model = {}
        if self.intercept:
            dict_model["Intercept"] = (coefficients[0],\
                                      std_error[0], \
                                      t_scores[0], \
                                      p_vals[0])
        for i, var in enumerate(domain.attributes):
            j = i + 1 if self.intercept else i
            dict_model[var.name] = (coefficients[j], \
                                   std_error[j],\
                                   t_scores[j],\
                                   p_vals[j])
        
        return LinearRegression(domain.class_var, domain, coefficients, F,
                 std_error=std_error, t_scores=t_scores, p_vals=p_vals, dict_model=dict_model,
                 fitted=fitted, residuals=residuals, m=m, n=n, mu_y=mu_y,
                 r2=r2, r2adj=r2adj, sst=sst, sse=sse, ssr=ssr,
                 std_coefficients=std_coefficients, intercept=self.intercept)