Python sf примеры, scipy.stats.t.sf Python примеры использования

Пример #1

0

Показать файл

 def different_stdev_explicite(self, alpha, y1, y2, S1, S2, n1, n2):
     t0 = (y1 - y2) / (np.sqrt(S1**2 / n1 + S2**2 / n2))
     # hypothesis testing2
     df = int((S1**2 / n1 + S2**2 / n2)**2 / ((S1**2 / n1)**2 / (n1 - 1) +
                                              (S2**2 / n2)**2 / (n2 - 1)))
     H1a = t.ppf(1 - alpha / 2., df) < np.abs(t0)
     H1b = t.ppf(1 - alpha, df) < t0
     H1c = t.ppf(alpha, df) > t0
     # p-value
     p1a = t.sf(np.abs(t0), df) * 2
     p1b = t.sf(t0, df)
     p1c = t.cdf(t0, df)
     c1 = y1 - y2 - t.ppf(1 - alpha / 2.,
                          df) * np.sqrt(S1**2 / n1 + S2**2 / n2)
     c2 = y1 - y2 + t.ppf(1 - alpha / 2.,
                          df) * np.sqrt(S1**2 / n1 + S2**2 / n2)
     CI = (c1, c2)
     print 'at the level of significance ', alpha, ':'
     print 'H1 mu1 != mu2 is ', H1a
     print 'H1 mu1 > mu2 is ', H1b
     print 'H1 mu1 < mu2 is ', H1c
     print 'probability of type I error for mu1 != mu2:', p1a
     print 'probability of type I error for mu1 > mu2:', p1b
     print 'probability of type I error for mu1 < mu2:', p1c
     print 'CI (%.1f%%) for mu1 - mu2:' % (100 - 100 * alpha), CI, CI / y1

Пример #2

0

Показать файл

Файл: match4.py Проект: cristph/PythonMatch

    def ttest_1samp(self, a, popmean):
        if (len(a) == 0):
            return [None, None]
        if (len(a) == 1):
            return [None, None]

        #cal avg
        avg = 0.0
        for x in a:
            avg += x
        avg = avg / len(a)

        S = 0.0
        for x in a:
            S += (x - avg)**2
        S = (S / (len(a) - 1))**0.5
        print S
        if (S == 0):
            return [None, None]
        tvalue = (avg - popmean) / (S / (len(a)**0.5))
        if (tvalue >= 0):
            p = t.sf(x=tvalue, df=len(a) - 1) * 2
            return [tvalue, p]
        else:
            p = 2 * t.sf(x=-tvalue, df=len(a) - 1)
            return [tvalue, p]

Пример #3

0

Показать файл

def p_valut_t(x_bar, mu, s, n, how):
    """ 计算sigma未知情况下的p-值
    总体均值假设检验，当sigma未知的情况下计算p-value

    Params
    ------
    x_bar: 样本均值
    mu: 总体均值，即目标值
    s: 样本方差
    n: 样本容量
    how: 假设检验方法，可选择 ( 'up', 'down', 'double' )

    Return
    ------
    (检验统计量, p-值)

    """
    t_dist = t(n-1)
    t_val = (x_bar - mu) / (s / np.sqrt(n))
    if how == 'up':
        p = t.sf(z)
    elif how == 'down':
        p = t.cdf(z)
    elif how == 'double':
        p = t.sf(abs(z)) * 2
    else:
        pass

    return t_val, p

Пример #4

0

Показать файл

Файл: match4.py Проект: cristph/PythonMatch

    def ttest_1samp(self,a,popmean):
        if(len(a)==0):
            return [None,None]
        if(len(a)==1):
            return [None,None]

        #cal avg
        avg=0.0
        for x in a:
            avg+=x
        avg=avg/len(a)

        S=0.0
        for x in a:
            S+=(x-avg)**2
        S=(S/(len(a)-1))**0.5
        print S
        if(S==0):
            return [None,None]
        tvalue=(avg-popmean)/(S/(len(a)**0.5))
        if(tvalue>=0):
            p=t.sf(x=tvalue,df=len(a)-1)*2
            return [tvalue,p]
        else:
            p=2*t.sf(x=-tvalue,df=len(a)-1)
            return [tvalue,p]

Пример #5

0

Показать файл

Файл: new_corr_hotttnesss.py Проект: pdrjuarez/csmusicproject

    def regression_analysis(self, key, info):
        '''
        Calculates all the values we will need for simple linear regression 
        analysis, and does the analysis itself.
        '''
        # not the most efficient, but we want to keep these values
        # to calculate standard errors
        info = list(info)

        # calculate sums
        sumx, sumy, sumxx, sumyy, sumxy, n = (0.0, 0.0, 0.0, 0.0, 0.0, 0)
        for (x, y) in info:
            sumx += x
            sumy += y
            sumxx += x * x
            sumyy += y * y
            sumxy += x * y
            n += 1

        # calculate correlation
        corr = 0
        corr_denom = math.sqrt((n * sumxx - sumx**2) * (n * sumyy - sumy**2))
        if corr_denom < 0.0001:
            yield False, "Could not calculate coefficients"

        corr_num = n * sumxy - sumx * sumy 
        corr = corr_num / corr_denom

        if abs(corr) < 0.0001:
            yield False, "Could not calculate coefficients"

        # calculate regression coefficients
        beta1 = (sumxy - sumx * sumy / n) / (sumxx - sumx**2 / n)
        beta0 = (sumy - beta1 * sumx) / n

        # calculate standard errors
        y_reals = [y for (x, y) in info]
        y_hats = [beta0 + beta1 * y for y in y_reals]
        s_num = sum([(y - yhat) for (y, yhat) in zip(y_reals, y_hats)])
        s = math.sqrt(s_num / (n - 2))

        se_denom = n * sumxx - sumx**2
        se_beta0 = s * math.sqrt(sumxx / se_denom)
        se_beta1 = s * math.sqrt(n / se_denom)

        # calculate t-values
        t0 = beta0 / se_beta0
        t1 = beta1 / se_beta1

        # calculate 2-sided p-values
        alpha = 0.05
        t_stat = t.ppf(1 - alpha/2, n - 2)
        beta0_p_value = t.sf(abs(t0), n - 2) * 2
        beta1_p_value = t.sf(abs(t1), n - 2) * 2

        # output most important values in a human-readable format
        print("Correlation: {}".format(corr))
        print("Beta 0: {}, p-value: {}".format(beta0, beta0_p_value))
        print("Beta 1: {}, p-value: {}".format(beta1, beta1_p_value))

Пример #6

0

Показать файл

Файл: correlation.py Проект: raphaelvallat/pingouin

def _correl_pvalue(r, n, k=0, alternative="two-sided"):
    """Compute the p-value of a correlation coefficient.

    https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html
    https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#Using_the_exact_distribution

    See also scipy.stats._ttest_finish

    Parameters
    ----------
    r : float
        Correlation coefficient.
    n : int
        Sample size
    k : int
        Number of covariates for (semi)-partial correlation.
    alternative : string
        Tail of the test.

    Returns
    -------
    pval : float
        p-value.

    Notes
    -----
    This uses the same approach as :py:func:`scipy.stats.pearsonr` to calculate
    the p-value (i.e. using a beta distribution)
    """
    from scipy.stats import t
    assert alternative in [
        'two-sided', 'greater', 'less'
    ], ("Alternative must be one of 'two-sided' (default), 'greater' or 'less'."
        )

    # Method 1: using a student T distribution
    dof = n - k - 2
    tval = r * np.sqrt(dof / (1 - r**2))
    if alternative == 'less':
        pval = t.cdf(tval, dof)
    elif alternative == 'greater':
        pval = t.sf(tval, dof)
    elif alternative == 'two-sided':
        pval = 2 * t.sf(np.abs(tval), dof)

    # Method 2: beta distribution (similar to scipy.stats.pearsonr, faster)
    # from scipy.special import btdtr
    # ab = (n - k) / 2 - 1
    # pval = 2 * btdtr(ab, ab, 0.5 * (1 - abs(np.float64(r))))
    return pval

Пример #7

0

Показать файл

def kramers_v(x, y, bias_correction=True):
    """Calculates Cramer's V statistic for categorical-categorical association.

    Taken from https://github.com/shakedzy/dython/blob/master/dython/nominal.py
    Inspired by Shaked Zychlinski.

    This is a symmetric coefficient: V(x,y) = V(y,x)
    Original function taken from: https://stackoverflow.com/a/46498792/5863503
    Wikipedia: https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V

    Parameters:
    -----------
    x : list / NumPy ndarray / Pandas Series
        A sequence of categorical measurements
    y : list / NumPy ndarray / Pandas Series
        A sequence of categorical measurements
    bias_correction : Boolean, default = True
        Use bias correction from Bergsma and Wicher,
        Journal of the Korean Statistical Society 42 (2013): 323-328.

    Returns:
    --------
    float in the range of [0,1]
    """
    confusion_matrix = crosstab(x, y)
    c2 = chi2_contingency(confusion_matrix)[0]
    n = confusion_matrix.sum().sum()
    phi2 = c2 / n
    r, k = confusion_matrix.shape
    if bias_correction:
        phi2corr = max(0, phi2 - ((k - 1) * (r - 1)) / (n - 1))
        rcorr = r - ((r - 1) ** 2) / (n - 1)
        kcorr = k - ((k - 1) ** 2) / (n - 1)
        if min((kcorr - 1), (rcorr - 1)) == 0:
            warnings.warn(
                "Unable to calculate Cramer's V using bias correction. Consider using bias_correction=False",
                RuntimeWarning)
            return np.nan
        else:

            V = np.sqrt(phi2corr / min((kcorr - 1), (rcorr - 1)))
            # calculate p-value using V
            tval = t.isf(0.975, n-3)
            return V, t.sf(abs(tval), n-2)
    else:
        V = np.sqrt(phi2 / min(k - 1, r - 1))
        tval = t.isf(0.975, n-3)
        return V, t.sf(abs(tval), n-2)

Пример #8

0

Показать файл

 def pearsonr(self, x, y):
     n = len(x)
     if(n == 0):
         return [None, None]
     sum_x1 = 0
     sum_x2 = 0
     for i in x:
         sum_x1 += float(i)
         sum_x2 += float(i)**2
     sum_y1 = 0
     sum_y2 = 0
     for i in y:
         sum_y1 += float(i)
         sum_y2 += float(i)**2
     f1 = 0
     for i in range(n):
         f1 += float(x[i])*float(y[i])
     f1 = f1 * n
     f1 = f1 - sum_x1 * sum_y1
     f21 = (n * sum_x2-sum_x1**2)**0.5
     f22 = (n * sum_y2-sum_y1**2)**0.5
     f2 = f21 * f22
     r = f1 / f2
     r = round(r, 6)
     if(r == 1 or r == -1):
         p = 0
     else:
         T = r * ((n-2)/(1-r**2))**0.5
         p = t.sf(abs(T), (n-2)) * 2
         p = round(p, 6)
         print p
     return [r, p]

Пример #9

0

Показать файл

Файл: analysis_stats.py Проект: nubs01/cta_analysis

def dunnetts_post_hoc(X0, X, alpha):
    Y = [X0, *X]
    p = len(X)
    N_i = [len(y) for y in Y]
    # s^2 = Sum(Sum((X_ij - |X|)^2))/n
    #n = sum(N_i) - (p+1)
    n = np.sum(N_i) - (p + 1)  # degrees of freedom
    s_num = np.sum([np.power([y - np.mean(x) for y in x], 2) for x in Y])
    s = np.sqrt(s_num / n)

    N = [len(x) for x in X]
    m0 = np.mean(X0)
    N0 = len(X0)
    t_cv = t.ppf(1 - (alpha / 2),
                 n)  # get 2-tailed critical value from t-disitribution
    CI = []
    P = []
    for x, Ni in zip(X, N):
        mx = np.mean(x)
        A0 = t_cv * s * np.sqrt(1 / Ni + 1 / N0)
        Ai = np.abs(mx - m0)
        Ti = Ai / (s * np.sqrt(1 / Ni + 1 / N0))
        Pi = t.sf(Ti, n)
        P.append(Pi)
        CI.append((Ai - A0, Ai + A0))

    return CI, P

Пример #10

0

Показать файл

Файл: linear_regression.py Проект: karangurtu/ML-Project

def main(feature_set):
    coef_list = []
    for iteration in range(MAX_ITERATIONS):
        print 'iteration: %d\r' % (iteration + 1),
        x_train, x_test, y_train, y_test = get_regression_dataset(
            0.6, feature_set=feature_set)
        # x_train, x_test = x_train[feature_set], x_test[feature_set]
        lr = LinearRegression()
        lr.fit(x_train, y_train)
        coef_list.append(lr.coef_)

    coef_list = np.array(coef_list)
    se = np.std(coef_list, 0) / np.sqrt(MAX_ITERATIONS)
    t = np.mean(coef_list, 0) / se
    pvalue = t_table.sf(np.fabs(t), len(t) - 1) * 2
    coef_list = np.mean(coef_list, 0)

    print '\n\n{:25s}   {:s}         {:s}  {:s}     {:s}'.format(
        'Field', 'COEF', 'Standard Error', 't-Statistics', 'P-value')
    print '================================================================================'
    for values in zip(feature_set, coef_list, se, t, pvalue):
        print '{:25s}   {:3.4f} \t    {:3.4f} \t    {:3.4f} \t  {:3.6f}'.format(
            *values)
    print '\n'
    print_errors(lr,
                 x_train,
                 y_train.values,
                 x_test,
                 y_test.values,
                 msg='Full Features')

Пример #11

0

Показать файл

Файл: base.py Проект: yiyang186/autostatest

def t_equal_var(n1, m1, var1, n2, m2, var2):
    temp = ((n1 - 1) * var1 +
            (n2 - 1) * var2) / (n1 + n2 - 2) * (1 / n1 + 1 / n2)
    _t = (m1 - m2) / np.sqrt(temp)
    _v = n1 + n2 - 2
    _p = t.sf(_t, _v)
    return _t, _v, _p

Пример #12

0

Показать файл

Файл: plot_grid_search_stats.py Проект: Yutaro-Sanada/scikit-learn-local

def compute_corrected_ttest(differences, df, n_train, n_test):
    """Computes right-tailed paired t-test with corrected variance.

    Parameters
    ----------
    differences : array-like of shape (n_samples, 1)
        Vector containing the differences in the score metrics of two models.
    df : int
        Degrees of freedom.
    n_train : int
        Number of samples in the training set.
    n_test : int
        Number of samples in the testing set.

    Returns
    -------
    t_stat : float
        Variance-corrected t-statistic.
    p_val : float
        Variance-corrected p-value.
    """
    mean = np.mean(differences)
    std = corrected_std(differences, n_train, n_test)
    t_stat = mean / std
    p_val = t.sf(np.abs(t_stat), df)  # right-tailed t-test
    return t_stat, p_val

Пример #13

0

Показать файл

Файл: seta.py Проект: KanwalPrakashSingh/data_cleaning

def get_correlation_parallel(s1, s2):
    """
    params s1 - series 1
    params s2 - series 2 
    NOTE : series are number 1 to 25 when giving in arguments
    returns the correlation between series
    """
    start = time.time()
    offsets = []  #this will be the arguments to all the parallel jobs
    instances = (MAX_ROWS / BATCH_SIZE)
    mean, std = calculate_mean_std_parallel()
    stripped_mean, stripped_std = calculate_stripped_mean_std_parallel(
        mean, std)
    processes = Pool(processes=instances)
    for i in range(instances):
        offsets.append(
            (s1, s2, mean, std, stripped_mean, stripped_std, i * BATCH_SIZE))
    results = processes.map(get_correlation, offsets)
    processes.close()
    processes.join()
    pearson_corr = 0
    total = 0
    for result in results:
        pearson_corr += result[0] * result[1]
        total += result[1]
    pearson_corr = 1.0 * pearson_corr / total
    t_value = abs(pearson_corr * math.sqrt(1.0 * (total - 2) /
                                           (1 -
                                            (pearson_corr * pearson_corr))))
    p_value = t.sf(t_value, total - 2)
    print "\n ######### CORRELATION BETWEEN SERIES ", s1, " AND SERIES ", s2, " is ", pearson_corr, "t value is ", t_value, " and p value is ", p_value, "######### \n"
    end = time.time()
    print "EXECUTION TIME : ", end - start, " sec"
    return pearson_corr

Пример #14

0

Показать файл

Файл: utils.py Проект: chiemvs/Weave

 def wrapper(*args, **kwargs) -> Tuple[float, float]:
     sample_dist = func(*args, **kwargs)
     estimate = sample_dist.mean()
     std_err_estimate = sample_dist.std()
     n_samples = len(sample_dist)
     return estimate, 2 * t.sf(
         x=abs(estimate), df=n_samples - 2, loc=0, scale=std_err_estimate)

Пример #15

0

Показать файл

Файл: M04_Ttest.py Проект: SuperLatte/Statistics_PyOJ_Competition

 def ttest_1samp(self, a, popmean):
     n = len(a)
     mean = self.mean(a)
     
     t = (mean-popmean)/(self.stan_de(a, mean)/(n**0.5))
     p = 2*T.sf(abs(t),n-1)
     return [round(t,6),round(p,6)]

Пример #16

0

Показать файл

Файл: match3.py Проект: cristph/PythonMatch

    def pearsonr(self, x, y):
        n = len(x)
        if (n == 0):
            return [None, None]

        sum_x = sum(x)
        sum_y = sum(y)

        sum_xy = 0.0
        sum_x2 = 0.0
        sum_y2 = 0.0
        for x, y in zip(x, y):
            sum_xy += x * y
            sum_x2 += x**2
            sum_y2 += y**2

        z = ((n * sum_x2 - (sum_x)**2) * (n * sum_y2 - (sum_y)**2))**0.5
        if (z == 0):
            return [None, 0]

        r = (n * sum_xy - sum_x * sum_y) / z
        if (abs(r) == 1):
            return [r, 0]

        tvalue = r * ((n - 2) / (1 - r**2))**0.5
        p = 2 * t.sf(x=abs(tvalue), df=n - 2)
        return (round(r, 6), round(p, 6))

Пример #17

0

Показать файл

Файл: Air_Quality_Analytics.py Проект: Dreese-s-Pieces/property-risk-assessment

    def get_local_air_quality_comparison(self, city_str, tolerance=2.0):
        self.city_str = city_str
        token = "fe269bc83b983ff958090f5808afa12eed57f14f"
        req_data = get_request_data(self.base_url + self.city_str +
                                    "/?token=" + token)

        lat, lng = req_data['data']['city']['geo']

        latlngbx = str(lat) + "," + str(lng) + "," + str(
            lat + tolerance) + "," + str(lng + tolerance)
        r = requests.get(
            "https://api.waqi.info/" +
            f"/map/bounds/?latlng={latlngbx}&token={token}").json()
        if len(r['data']) > 0:
            local_df = make_dataframe(r)
            air_quality_comp = {
                'deviation': 'Not found',
                'probability': 'Not found'
            }

            deviation = local_df[local_df['name'].str.contains(
                city_str)]['aqi'].mean() - local_df['aqi'].mean()

            if not np.isnan(deviation):
                air_quality_comp['deviation'] = deviation

            probability = one_samp_t_test(
                local_df[local_df['name'].str.contains(city_str)], deviation)
            probability = t.sf(np.abs(probability), local_df.count() - 1)[0]

            if not np.isnan(probability):
                air_quality_comp['probability'] = probability

            return air_quality_comp

Пример #18

0

Показать файл

Файл: regression.py Проект: gwenscheresky/Gwen_611

 def calculate_t_p_error_stats(self):
     self.rating_dict = {.05:"*",
                    .01:"**",
                    .001: "***"}
     results = self.estimates
     stat_sig_names = ["SE", "t-stat", "p-value"]
     for stat_name in stat_sig_names: 
         results[stat_name] = np.nan
     #generate statistic for each variable
     for var in self.beta_names:
         #SE of coefficient is found in the diagonal of cov_matrix
         results.loc[var]["SE"] = self.cov_matrix[var][var] ** (1/2)
         #tstat = Coeff / SE
         results.loc[var]["t-stat"] = \
             results["Coefficient"][var] / results["SE"][var]
         #p-value is estimated using a  table that transforms t-value in refference to df
         results.loc[var]["p-value"] = np.round(t.sf(np.abs(results.\
                    loc[var]["t-stat"]),self.degrees_of_freedom + 1) * 2, 5)
     #values for signifiances will be blank unless p-value < .05
     #pandas does not allow np.nan values or default blank strings to be replaced
     significance = ["" for i in range(len(self.beta_names))]   
     for i in range(len(self.beta_names)):
         var = self.beta_names[i]
         for val in self.rating_dict:
             if results.loc[var]["p-value"] < val:
                 significance[i] = self.rating_dict[val]
                 print(var, self.rating_dict[val])  
     results["significance"] = significance

Пример #19

0

Показать файл

 def calculate_t_p_error_stats(self):
     est = ["SE", "t-stat", "p-value", "p-rating"]
     rating_dict = {.001:"***",
                    .01:"**",
                    .05:"*"}
     for name in est: 
         results = self.estimates
         results[name] = np.nan
         for var in self.beta_names:
             if name == "SE": 
                 # SE of coefficient is found in the diagonal of cov_matrix
                 results.ix[var][name] = \
                 self.cov_matrix[var][var] ** (1/2)
             if name == "t-stat":
                 # tstat = Coef / SE
                 results.ix[var][name] = \
                 results.ix[var]["Coefficient"] / results.ix[var]["SE"]
             if name == "p-value":
                 # p-values is estimated from location within a 
                 # distribution implied by the t-stat
                 results.ix[var][name] = round(t.sf(\
                           np.abs(results.ix[var]["t-stat"]), 
                           self.degrees_of_freedom + 1) * 2, 5)
             if name == "p-rating":
                 print(name)
                 for val in rating_dict:
                     if results.ix[var]["p-value"] < val:
                         results[name][var] = rating_dict[val]
                         break
                     # if p-stat > .05, no break in for-loop, set val of ""
                     results[name][var] = ""

Пример #20

0

Показать файл

Файл: ols.py Проект: ChunHungLiu/pandas

    def _p_value_raw(self):
        """Returns the raw p values."""
        from scipy.stats import t

        result = [2 * t.sf(a, b) for a, b in zip(np.fabs(self._t_stat_raw), self._df_resid_raw)]

        return np.array(result)

Пример #21

0

Показать файл

    def pearsonr(self, x, y):
        n = len(x)
        if (n == 0):
            return [None, None]
        else:
            sumX = 0
            sumY = 0
            sumX2 = 0
            sumY2 = 0

            sumX = self.getSum(x)
            sumX2 = self.getSum2(x)
            sumY = self.getSum(y)
            sumY2 = self.getSum2(y)

            xy = 0
            for i in range(n):
                xy += float(x[i]) * float(y[i])
            f1 = n * xy - sumX * sumY
            f21 = (n * sumX2 - sumX**2)**0.5
            f22 = (n * sumY2 - sumY**2)**0.5
            f2 = f21 * f22
            if (f2 == 0):
                return [None, None]
            r = f1 / f2
            r = round(r, 6)

            if (r == 1 or r == -1):
                p = 0
            else:
                T = r * ((n - 2) / (1 - r**2))**0.5
                p = t.sf(abs(T), n - 2) * 2
                p = round(p, 6)

            return [r, p]

Пример #22

0

Показать файл

Файл: 3Pearson.py Проект: Twopothead/NJUSE

 def pearsonr(self, x, y):
     sx = 0.0
     sy = 0.0
     sxy = 0.0
     sxx = 0.0
     syy = 0.0
     if len(x) == 0 or len(y) == 0:
         return [None, None]
     if len(x) != len(y):
         return [None, None]
     n = len(x)
     for i in range(0, n):
         sx += x[i]
         sy += y[i]
         sxy += x[i] * y[i]
         sxx += x[i]**2
         syy += y[i]**2
     rxy = (n * sxy - sx * sy) / ((n * sxx - sx**2) *
                                  (n * syy - sy**2))**0.5
     v = (1 - rxy**2)
     if v == 0:
         return [round(rxy, 6), 0.000000]
     t = rxy * (((n - 2) / (1 - rxy**2))**0.5)
     p = T.sf(t, n - 2)
     if p > 0.5:
         p = 1 - p
     else:
         p = 2 * p
     return [round(rxy, 6), round(p, 6)]

Пример #23

0

Показать файл

Файл: match3.py Проект: cristph/PythonMatch

 def pearsonr(self,x,y):
     n=len(x)
     if(n==0):
         return [None,None]
     
     sum_x=sum(x)
     sum_y=sum(y)
     
     sum_xy=0.0
     sum_x2=0.0
     sum_y2=0.0
     for x,y in zip(x,y):
         sum_xy+=x*y
         sum_x2+=x**2
         sum_y2+=y**2
     
     z=((n*sum_x2-(sum_x)**2)*(n*sum_y2-(sum_y)**2))**0.5
     if(z==0):
         return [None,0]
     
     r=(n*sum_xy-sum_x*sum_y)/z
     if(abs(r)==1):
         return [r,0]
     
     tvalue=r*((n-2)/(1-r**2))**0.5
     p=2*t.sf(x=abs(tvalue),df=n-2)
     return (round(r,6),round(p,6))

Пример #24

0

Показать файл

 def calculate_t_p_error_stats(self):
     results = self.estimates
     stat_sig_names = ["SE", "t-stat", "p-value"]
     # create space in data frame for SE, t, and p
     for stat_name in stat_sig_names:
         results[stat_name] = np.nan
     # generate statistic for each variable
     for var in self.beta_names:
         # SE ** 2 of coefficient is found in the diagonal of the cov_matrix
         results.loc[var]["SE"] = self.cov_matrix[var][var] ** (1/2)     
         # t-stat = Coef / SE
         results.loc[var]["t-stat"] = \
             results["Coefficient"][var] / results["SE"][var]
         # p-values is estimated using a table that transforms t-stat in   
         # light of degrees of freedom  
         # 2 is for 2 tail...
         # 5 is to round to 5 decimal places
         results.loc[var]["p-value"] = np.round(
             t.sf(np.abs(results.loc[var]["t-stat"]), 
                  self.degrees_of_freedom +1) * 2, 5)
     ratings = [.05, .01, .001]
     significance = ["" for name in self.beta_names]
     for i in range(len(self.beta_names)):
         var = self.beta_names[i]
         for rating in ratings:
             if results.loc[var]["p-value"] < rating:
                 significance[i] = significance[i] + "*"
     results["significance"] = significance

Пример #25

0

Показать файл

Файл: two_sample_t_test.py Проект: rostar/rostar

 def different_stdev(self, alpha):
     t0 = (self.y1 - self.y2) / (np.sqrt(self.S1**2/self.n1 +
                                         self.S2**2/self.n2))
     # hypothesis testing2
     n1, n2, y1, y2, S1, S2 = self.n1, self.n2, self.y1, self.y2, self.S1, self.S2
     df = int((S1**2/n1+S2**2/n2)**2/((S1**2/n1)**2/(n1-1)+(S2**2/n2)**2/(n2-1)))
     H1a = t.ppf(1 - alpha/2., df) < np.abs(t0)
     H1b = t.ppf(1 - alpha, df) < t0
     H1c = t.ppf(alpha, df) > t0
     # p-value
     p1a = t.sf(np.abs(t0), df) * 2
     p1b = t.sf(t0, df)
     p1c = t.cdf(t0, df)
     c1 = y1 - y2 - t.ppf(1 - alpha/2., df) * np.sqrt(S1**2/n1+S2**2/n2)        
     c2 = y1 - y2 + t.ppf(1 - alpha/2., df) * np.sqrt(S1**2/n1+S2**2/n2)
     return H1a, H1b, H1c, p1a, p1b, p1c, (c1,c2)

Пример #26

0

Показать файл

    def calculate_t_p_error_stats(self):
        ratings = [.05, .01, .001]
        results = self.estimates
        stat_sig_names = ["SE", "t-stat", "p-value"]
        # create space in data frame for SE, t, and p
        for stat_name in stat_sig_names:
            results[stat_name] = np.nan
        # generate statistic for each variable
        for var in self.beta_names:
            # SE ** 2 of coefficient is found in the diagonal of cov_matrix
            results.loc[var]["SE"] = self.cov_matrix[var][var]**(1 / 2)

            # t-stat = Coef / SE
            results.loc[var]["t-stat"] = \
                results["Coefficient"][var] / results["SE"][var]
            # p-values is estimated using a table that transforms t-value in
            # light of degrees of freedom
            results.loc[var]["p-value"] = np.round(t.sf(np.abs(results.\
                       loc[var]["t-stat"]), self.degrees_of_freedom + 1) * 2, 5)
        # values for significances will be blank unless p-values < .05
        # pandas does not allow np.nan values or default blank strings to
        # be replaced x-post
        significance = ["" for i in range(len(self.beta_names))]
        for i in range(len(self.beta_names)):
            var = self.beta_names[i]
            for val in ratings:
                if results.loc[var]["p-value"] < val:
                    significance[i] = significance[i] + "*"
        results["signficance"] = significance

Пример #27

0

Показать файл

Файл: seta.py Проект: KanwalPrakashSingh/data_cleaning

def get_correlation_parallel(s1,s2):
    """
    params s1 - series 1
    params s2 - series 2 
    NOTE : series are number 1 to 25 when giving in arguments
    returns the correlation between series
    """
    start = time.time()
    offsets = [] #this will be the arguments to all the parallel jobs
    instances = (MAX_ROWS/BATCH_SIZE)
    mean,std = calculate_mean_std_parallel()
    stripped_mean,stripped_std = calculate_stripped_mean_std_parallel(mean,std)
    processes = Pool(processes=instances)
    for i in range(instances):
        offsets.append((s1,s2,mean,std,stripped_mean,stripped_std,i*BATCH_SIZE))
    results = processes.map(get_correlation,offsets)
    processes.close()
    processes.join()
    pearson_corr = 0
    total = 0
    for result in results:
        pearson_corr += result[0]*result[1]
        total += result[1]
    pearson_corr = 1.0*pearson_corr / total
    t_value = abs(pearson_corr*math.sqrt( 1.0*(total - 2) / ( 1 - (pearson_corr*pearson_corr))))
    p_value = t.sf(t_value,total-2)
    print "\n ######### CORRELATION BETWEEN SERIES ",s1," AND SERIES ",s2, " is ",pearson_corr , "t value is ", t_value ," and p value is ", p_value,  "######### \n" 
    end = time.time()
    print "EXECUTION TIME : ", end-start , " sec"
    return pearson_corr

Пример #28

0

Показать файл

Файл: functions.py Проект: ysilvy/Density_bining

def lag_linregress_3D(x, y, lagx=0, lagy=0):
    """
    Input: Two xr.Datarrays of any dimensions with the first dim being time. 
    Thus the input data could be a 1D time series, or for example, have three 
    dimensions (time,lat,lon). 
    Datasets can be provided in any order, but note that the regression slope 
    and intercept will be calculated for y with respect to x.
    Output: Covariance, correlation, regression slope and intercept, p-value, 
    and standard error on regression between the two datasets along their 
    aligned time dimension.  
    Lag values can be assigned to either of the data, with lagx shifting x, and
    lagy shifting y, with the specified lag amount. 
    """
    #1. Ensure that the data are properly alinged to each other.
    x, y = xr.align(x, y)

    #2. Add lag information if any, and shift the data accordingly
    if lagx != 0:

        # If x lags y by 1, x must be shifted 1 step backwards.
        # But as the 'zero-th' value is nonexistant, xr assigns it as invalid
        # (nan). Hence it needs to be dropped
        x = x.shift(time=-lagx).dropna(dim='time')

        # Next important step is to re-align the two datasets so that y adjusts
        # to the changed coordinates of x
        x, y = xr.align(x, y)

    if lagy != 0:
        y = y.shift(time=-lagy).dropna(dim='time')
        x, y = xr.align(x, y)

    #3. Compute data length, mean and standard deviation along time axis:
    n = y.notnull().sum(dim='time')
    xmean = x.mean(axis=0)
    ymean = y.mean(axis=0)
    xstd = x.std(axis=0)
    ystd = y.std(axis=0)

    #4. Compute covariance along time axis
    cov = np.sum((x - xmean) * (y - ymean), axis=0) / (n)

    #5. Compute correlation along time axis
    cor = cov / (xstd * ystd)

    #6. Compute regression slope and intercept:
    slope = cov / (xstd**2)
    intercept = ymean - xmean * slope

    #7. Compute P-value and standard error
    #Compute t-statistics
    tstats = cor * np.sqrt(n - 2) / np.sqrt(1 - cor**2)
    stderr = slope / tstats

    from scipy.stats import t
    pval = t.sf(tstats, n - 2) * 2
    pval = xr.DataArray(pval, dims=cor.dims, coords=cor.coords)

    return cov, cor, slope, intercept, pval, stderr

Пример #29

0

Показать файл

Файл: correlation.py Проект: snijesh/pingouin

def bicor(x, y, c=9):
    """
    Biweight midcorrelation.

    Parameters
    ----------
    x, y : array_like
        First and second set of observations. x and y must be independent.
    c : float
        Tuning constant for the biweight estimator (default = 9.0).

    Returns
    -------
    r : float
        Correlation coefficient.
    pval : float
        Two-tailed p-value.

    Notes
    -----
    This function will return (np.nan, np.nan) if mad(x) == 0 or mad(y) == 0.

    References
    ----------
    https://en.wikipedia.org/wiki/Biweight_midcorrelation

    https://docs.astropy.org/en/stable/api/astropy.stats.biweight.biweight_midcovariance.html

    Langfelder, P., & Horvath, S. (2012). Fast R Functions for Robust
    Correlations and Hierarchical Clustering. Journal of Statistical Software,
    46(11). https://www.ncbi.nlm.nih.gov/pubmed/23050260
    """
    from scipy.stats import t
    # Calculate median
    nx = x.size
    x_median = np.median(x)
    y_median = np.median(y)
    # Raw median absolute deviation
    x_mad = np.median(np.abs(x - x_median))
    y_mad = np.median(np.abs(y - y_median))
    if x_mad == 0 or y_mad == 0:
        # From Langfelder and Horvath 2012:
        # "Strictly speaking, a call to bicor in R should return a missing
        # value if mad(x) = 0 or mad(y) = 0." This avoids division by zero.
        return np.nan, np.nan
    # Calculate weights
    u = (x - x_median) / (c * x_mad)
    v = (y - y_median) / (c * y_mad)
    w_x = (1 - u**2)**2 * ((1 - np.abs(u)) > 0)
    w_y = (1 - v**2)**2 * ((1 - np.abs(v)) > 0)
    # Normalize x and y by weights
    x_norm = (x - x_median) * w_x
    y_norm = (y - y_median) * w_y
    denom = (np.sqrt((x_norm**2).sum()) * np.sqrt((y_norm**2).sum()))
    # Calculate r, t and two-sided p-value
    r = (x_norm * y_norm).sum() / denom
    tval = r * np.sqrt((nx - 2) / (1 - r**2))
    pval = 2 * t.sf(abs(tval), nx - 2)
    return r, pval

Пример #30

0

Показать файл

def percbend(x, y, beta=0.2):
    """
    Percentage bend correlation (Wilcox 1994).
    Parameters
    ----------
    x, y : array_like
        First and second set of observations. x and y must be independent.
    beta : float
        Bending constant for omega (0 <= beta <= 0.5).
    Returns
    -------
    r : float
        Percentage bend correlation coefficient.
    pval : float
        Two-tailed p-value.
    Notes
    -----
    Code inspired by Matlab code from Cyril Pernet and Guillaume Rousselet.
    References
    ----------
    .. [1] Wilcox, R.R., 1994. The percentage bend correlation coefficient.
       Psychometrika 59, 601–616. https://doi.org/10.1007/BF02294395
    .. [2] Pernet CR, Wilcox R, Rousselet GA. Robust Correlation Analyses:
       False Positive and Power Validation Using a New Open Source Matlab
       Toolbox. Frontiers in Psychology. 2012;3:606.
       doi:10.3389/fpsyg.2012.00606.
    """
    X = np.column_stack((x, y))
    nx = X.shape[0]
    M = np.tile(np.median(X, axis=0), nx).reshape(X.shape)
    W = np.sort(np.abs(X - M), axis=0)
    m = int((1 - beta) * nx)
    omega = W[m - 1, :]
    P = (X - M) / omega
    P[np.isinf(P)] = 0
    P[np.isnan(P)] = 0

    # Loop over columns
    a = np.zeros((2, nx))
    for c in [0, 1]:
        psi = P[:, c]
        i1 = np.where(psi < -1)[0].size
        i2 = np.where(psi > 1)[0].size
        s = X[:, c].copy()
        s[np.where(psi < -1)[0]] = 0
        s[np.where(psi > 1)[0]] = 0
        pbos = (np.sum(s) + omega[c] * (i2 - i1)) / (s.size - i1 - i2)
        a[c] = (X[:, c] - pbos) / omega[c]

    # Bend
    a[a <= -1] = -1
    a[a >= 1] = 1

    # Get r, tval and pval
    a, b = a
    r = (a * b).sum() / np.sqrt((a ** 2).sum() * (b ** 2).sum())
    tval = r * np.sqrt((nx - 2) / (1 - r ** 2))
    pval = 2 * t.sf(abs(tval), nx - 2)
    return r, pval

Пример #31

0

Показать файл

Файл: linear_regression.py Проект: peter-woyzbun/dgrmr

    def _p_values(self):
        """
        Return the model's coefficient/parameter p-values.

        :return: Numpy array
        """
        p_vals = [t.sf(abs(x), self.deg_of_freedom)*2 for x in self.t_statistics]
        return p_vals

Пример #32

0

Показать файл

Файл: two_sample_t_test.py Проект: rostar/rostar

    def equal_stdev(self, alpha):
        n1, n2, y1, y2 = self.n1, self.n2, self.y1, self.y2
        Sp = np.sqrt( ((n1 - 1)*self.S1**2 +
                       (n2 - 1)*self.S2**2) / (n1 + n2 - 2) )
        t0 = (y1 - y2) / (Sp * np.sqrt(1./n1 + 1./n2))

        # hypothesis testing2
        H1a = t.ppf(1 - alpha/2., n1 + n2 -2) < np.abs(t0)
        H1b = t.ppf(1 - alpha, n1 + n2 -2) < t0
        H1c = t.ppf(alpha, n1 + n2 -2) > t0
        # p-value
        p1a = t.sf(np.abs(t0), n1 + n2 -2) * 2
        p1b = t.sf(t0, n1 + n2 -2)
        p1c = t.cdf(t0, n1 + n2 -2)
        c1 = y1 - y2 - t.ppf(1 - alpha/2., n1 + n2 -2) * Sp * np.sqrt(1./n1+1./n2)        
        c2 = y1 - y2 + t.ppf(1 - alpha/2., n1 + n2 -2) * Sp * np.sqrt(1./n1+1./n2)
        return H1a, H1b, H1c, p1a, p1b, p1c, (c1,c2)

Пример #33

0

Показать файл

Файл: aboutmeans.py Проект: cerebunit/cerebstats

 def _compute_pvalue(self):
     """Returns the p-value."""
     if self.test_statistic_name == "t":
         if self.side is "less_than":
             return student_t.cdf(self.test_statistic, self.deg_of_freedom)
         elif self.side is "greater_than":
             return student_t.sf(self.test_statistic, self.deg_of_freedom)
         else:  #side is "not_equal"
             return 2 * student_t.sf(abs(self.test_statistic),
                                     self.deg_of_freedom)
     elif self.test_statistic_name == "z":
         if self.side is "less_than":
             return norm.cdf(self.test_statistic)
         elif self.side is "greater_than":
             return norm.sf(self.test_statistic)
         else:  #side is "not_equal"
             return 2 * norm.sf(abs(self.test_statistic))

Пример #34

0

Показать файл

Файл: cleaner.py Проект: musicmilif/2019kdd_automl

    def t_test(cat):
        p = cat['avg_x']*cat['cnt_x']+cat['avg_y']*cat['cnt_y']
        p = p/(cat['cnt_x']+cat['cnt_y'])
        p += 1e-8
        z = (cat['avg_x']-cat['avg_y']) / np.sqrt((p*(1-p)*(1/cat['cnt_x']+1/cat['cnt_y'])))
        p_value = t.sf(abs(z), df=cat['cnt_x']+cat['cnt_y']-2)*2

        return p_value

Пример #35

0

Показать файл

def t_tests_on_mean(mu_0, x_var, s, n, alpha, power=None):
    print("Two-Sided t-Test - H_0 : μ = {} vs H_A : μ ≠ {}".format(mu_0, mu_0))
    print("with x_var {}, s {}, n {}, α {} :\n".format(x_var, s, n, alpha))

    t_statistic = (x_var - mu_0) / (s / sqrt(n))
    p_value = t.sf(np.abs(t_statistic), n - 1) * 2
    print("t-statistic : {:.4f}, p-value : 2P(T>=|t|) = {:.3f}".format(
        t_statistic, p_value))
    print("The null hypothesis is {}".format(
        "Accepted" if p_value > alpha else "Rejected"))
    print(
        "========================================================================"
    )

    print("One-Sided t-Test - H_0 : μ <= {} vs H_A : μ > {}".format(
        mu_0, mu_0))
    print("with x_var {}, s {}, n {}, α {} :\n".format(x_var, s, n, alpha))

    p_value = t.sf(t_statistic, n - 1)
    print("t-statistic : {:.4f}, p-value : P(T>=t) = {:.3f}".format(
        t_statistic, p_value))
    print("The null hypothesis is {}".format(
        "Accepted" if p_value > alpha else "Rejected"))
    print(
        "========================================================================"
    )

    print("One-Sided t-Test - H_0 : μ >= {} vs H_A : μ < {}".format(
        mu_0, mu_0))
    print("with x_var {}, s {}, n {}, α {} :\n".format(x_var, s, n, alpha))

    p_value = 1 - t.sf(t_statistic, n - 1)
    print("t-statistic : {:.4f}, p-value : P(T<=t) = {:.3f}".format(
        t_statistic, p_value))
    print("The null hypothesis is {}".format(
        "Accepted" if p_value > alpha else "Rejected"))
    print(
        "========================================================================"
    )

    if power is not None:
        raise NotImplementedError
        print("Power >= {} requires n >= {}".format(power, 1))
        print(
            "========================================================================"
        )

Пример #36

0

Показать файл

Файл: base.py Проект: yiyang186/autostatest

def welch(n1, m1, var1, n2, m2, var2, alpha=0.05):
    _t = (m1 - m2) / np.sqrt(var1 / n1 + var2 / n2)
    var1_SE, var2_SE = var1 / n1, var2 / n2

    _v = (var1_SE + var2_SE)**2 / (var1_SE**2 / (n1 + 1) + var2_SE**2 /
                                   (n2 + 1)) - 2
    _p = t.sf(_t, _v)
    return _t, _v, _p

Пример #37

0

Показать файл

Файл: base.py Проект: yiyang186/autostatest

def satterthwaite(n1, m1, var1, n2, m2, var2, alpha=0.05):
    _t = (m1 - m2) / np.sqrt(var1 / n1 + var2 / n2)
    var1_SE, var2_SE = var1 / n1, var2 / n2

    _v = (var1_SE + var2_SE)**2 / (var1_SE**2 / (n1 - 1) + var2_SE**2 /
                                   (n2 - 1))
    _p = t.sf(_t, _v)
    return _t, _v, _p

Пример #38

0

Показать файл

Файл: M03_Pearson.py Проект: SuperLatte/Statistics_PyOJ_Competition

 def pearsonr(self, x, y):
     n = len(x)
     if n==0:
         return [None,None]
     r = (n*self.proSum(x, y)-self.summary(x)*self.summary(y))/(((n*self.squareSum(x)-self.summary(x)**2)*(n*self.squareSum(y)-self.summary(y)**2))**0.5)
     t = r*(float(n-2)/(1-r**2))**0.5
     p = 2*T.sf(abs(t), n-2)
     return [round(r,6),round(p,6)]

Пример #39

0

Показать файл

def full_glm_results(endog_arr,
                     exog_vars,
                     return_resids=False,
                     only_tvals=False,
                     PCA_whiten=False,
                     ZCA_whiten=False,
                     orthogonalize=True,
                     orthogNear=False,
                     orthog_GramSchmidt=False):
    if np.mean(exog_vars[:, 0]) != 1:
        print(
            "Warning: the intercept is not included as the first column in your exogenous variable array"
        )
    n, num_depv = endog_arr.shape
    k = exog_vars.shape[1]

    if orthogonalize:
        exog_vars = sm.add_constant(orthog_columns(exog_vars[:, 1:]))
    elif orthogNear:
        exog_vars = sm.add_constant(ortho_neareast(exog_vars[:, 1:]))
    elif orthog_GramSchmidt:  # for when order matters AKA type 2 sum of squares
        exog_vars = sm.add_constant(gram_schmidt_orthonorm(exog_vars[:, 1:]))
    else:
        pass

    invXX = np.linalg.inv(np.dot(exog_vars.T, exog_vars))

    DFbetween = k - 1  # aka df model
    DFwithin = n - k  # aka df residuals
    DFtotal = n - 1
    if PCA_whiten:
        endog_arr = PCAwhiten(endog_arr)
    if ZCA_whiten:
        endog_arr = ZCAwhiten(endog_arr)

    a = cy_lin_lstsqr_mat(exog_vars, endog_arr)
    sigma2 = np.sum((endog_arr - np.dot(exog_vars, a))**2, axis=0) / (n - k)
    se = se_of_slope(num_depv, invXX, sigma2, k)

    if only_tvals:
        return a / se
    else:
        resids = endog_arr - np.dot(exog_vars, a)
        RSS = np.sum(resids**2, axis=0)
        TSS = np.sum((endog_arr - np.mean(endog_arr, axis=0))**2, axis=0)
        R2 = 1 - (RSS / TSS)

        std_y = np.sqrt(TSS / DFtotal)
        R2_adj = 1 - ((1 - R2) * DFtotal / (DFwithin))
        Fvalues = ((TSS - RSS) / (DFbetween)) / (RSS / DFwithin)
        Tvalues = a / se
        Pvalues = t.sf(np.abs(Tvalues), DFtotal) * 2
        if return_resids:
            fitted = np.dot(exog_vars, a)
            return (Fvalues, Tvalues, Pvalues, R2, R2_adj, np.array(resids),
                    np.array(fitted))
        else:
            return (Fvalues, Tvalues, Pvalues, R2, R2_adj)

Пример #40

0

Показать файл

    def _overlap_output(self, category_names, overlap_matrix, M_annot, M_tot, print_coefficients):
        '''LD Score regression summary for overlapping categories.'''
        overlap_matrix_prop = np.zeros([self.n_annot,self.n_annot])
        for i in range(self.n_annot):
            overlap_matrix_prop[i, :] = overlap_matrix[i, :] / M_annot

        prop_hsq_overlap = np.dot(
            overlap_matrix_prop, self.prop.T).reshape((1, self.n_annot))
        prop_hsq_overlap_var = np.diag(
            np.dot(np.dot(overlap_matrix_prop, self.prop_cov), overlap_matrix_prop.T))
        prop_hsq_overlap_se = np.sqrt(
            np.maximum(0, prop_hsq_overlap_var)).reshape((1, self.n_annot))
        one_d_convert = lambda x: np.array(x).reshape(np.prod(x.shape))
        prop_M_overlap = M_annot / M_tot
        enrichment = prop_hsq_overlap / prop_M_overlap
        enrichment_se = prop_hsq_overlap_se / prop_M_overlap
        overlap_matrix_diff = np.zeros([self.n_annot,self.n_annot])
        for i in range(self.n_annot):
            if not M_tot == M_annot[0,i]:
                overlap_matrix_diff[i, :] = overlap_matrix[i,:]/M_annot[0,i] - \
                    (M_annot - overlap_matrix[i,:]) / (M_tot-M_annot[0,i])

        diff_est = np.dot(overlap_matrix_diff,self.coef)
        diff_cov = np.dot(np.dot(overlap_matrix_diff,self.coef_cov),overlap_matrix_diff.T)
        diff_se = np.sqrt(np.diag(diff_cov))
        diff_p = [np.nan if diff_se[i]==0 else 2*tdist.sf(abs(diff_est[i]/diff_se[i]),self.n_blocks) \
            for i in range(self.n_annot)]
        
        coef_z = []
        for i in range(self.n_annot):
            if one_d_convert(self.coef)[i]==0 and one_d_convert(self.coef_se)[i]==0:
                coef_z.append(0)
            elif one_d_convert(self.coef_se)[i]==0:
                coef_z.append('NA')
            else:
                coef_z.append(one_d_convert(self.coef)[i] / one_d_convert(self.coef_se)[i])

        df = pd.DataFrame({
            'Category': category_names,
            'Prop._SNPs': one_d_convert(prop_M_overlap),
            'Prop._h2': one_d_convert(prop_hsq_overlap),
            'Prop._h2_std_error': one_d_convert(prop_hsq_overlap_se),
            'Enrichment': one_d_convert(enrichment),
            'Enrichment_std_error': one_d_convert(enrichment_se),
            'Enrichment_p':diff_p,
            'Coefficient': one_d_convert(self.coef),
            'Coefficient_std_error': self.coef_se,
            'Coefficient_z-score': coef_z
        })
        if print_coefficients:
            df = df[['Category', 'Prop._SNPs', 'Prop._h2', 'Prop._h2_std_error',
                    'Enrichment','Enrichment_std_error', 'Enrichment_p',
                     'Coefficient', 'Coefficient_std_error','Coefficient_z-score']]
        else:
            df = df[['Category', 'Prop._SNPs', 'Prop._h2', 'Prop._h2_std_error',
                    'Enrichment','Enrichment_std_error', 'Enrichment_p']]
        return df

Пример #41

0

Показать файл

Файл: corr_hotttnesss.py Проект: pdrjuarez/csmusicproject

    def reducer(self, key, info):
        '''
        Calculates all the values we will need for simple linear regression 
        analysis, and does the analysis itself.
        '''
        # not the most efficient, but we want to keep these values
        # to calculate standard errors
        info = list(info)

        # calculate sums
        sumx, sumy, sumxx, sumyy, sumxy, n = (0.0, 0.0, 0.0, 0.0, 0.0, 0)
        for (x, y) in info:
            sumx += x
            sumy += y
            sumxx += x * x
            sumyy += y * y
            sumxy += x * y
            n += 1

        # calculate correlation
        corr = 0
        corr_denom = math.sqrt((n * sumxx - sumx**2) * (n * sumyy - sumy**2))
        if corr_denom < 0.0001:
            yield False, "Could not calculate coefficients"

        corr_num = n * sumxy - sumx * sumy 
        corr = corr_num / corr_denom

        if abs(corr) < 0.0001:
            yield False, "Could not calculate coefficients"

        # calculate regression coefficients
        beta1 = (sumxy - sumx * sumy / n) / (sumxx - sumx**2 / n)
        beta0 = (sumy - beta1 * sumx) / n

        # calculate standard errors
        # note: this is the reason why this isn't in a regression class
        y_reals = [y for (x, y) in info]
        y_hats = [beta0 + beta1 * y for y in y_reals]
        s_num = sum([(y - yhat) for (y, yhat) in zip(y_reals, y_hats)])
        s = math.sqrt(s_num / (n - 2))

        se_denom = n * sumxx - sumx**2
        # se_beta0 = s * math.sqrt(sumxx / se_denom)
        se_beta1 = s * math.sqrt(n / se_denom)

        # calculate t-values
        # t0 = beta0 / se_beta0
        t1 = beta1 / se_beta1

        # calculate 2-sided p-values
        # alpha = 0.05
        # t_stat = t.ppf(1 - alpha/2, n - 2)
        # beta0_p_value = t.sf(abs(t0), n - 2) * 2
        beta1_p_value = t.sf(abs(t1), n - 2) * 2

        yield None, (beta1, beta1_p_value, corr)

Пример #42

0

Показать файл

Файл: 04T检验.py Проект: XNYu/Statistic

 def ttest_1samp(self, a, popmean):
     mean,s=0.0,0.0
     mean=sum(a)/float(len(a))
     for i in a:
         s+=(i-mean)**2
     s/=(len(a)-1)
     s=s**0.5
     T=(mean-popmean)/(s/(len(a))**0.5)
     P=t.sf(abs(T),len(a)-1)*2
     return[round(T,6),round(P,6)]

Пример #43

0

Показать файл

Файл: rna_counts_analysis.py Проект: chiffa/BioFlow

def significantly_different_genes(
        rpkm_table,
        experiment_groups,
        intergroups,
        target_p_value=0.05):
    """
    Performs a test that uses the error function to determine if we can reject the hypothesis that
    all the genes are sampled from the same distribution.

    :param rpkm_table: table of the rpkm values
    :param experiment_groups: groups on indexes
    :param intergroups: the groups between which we want to do the comparisons
    :param target_p_value: p_value with which we want to be able to reject the null hypothesis
    """
    groups_means = np.zeros((rpkm_table.shape[0], len(experiment_groups)))
    groups_var = np.zeros((rpkm_table.shape[0], len(experiment_groups)))

    for i, group in enumerate(experiment_groups):
        groups_means[:, i] = np.mean(rpkm_table[:, group], axis=1)
        groups_var[:, i] = np.var(rpkm_table[:, group], axis=1) / \
            estimator_dilatation_table[len(group)] ** 2

    group_comparison = []
    for bi_group in intergroups:
        groups_mean_difference = np.fabs(
            groups_means[
                :,
                bi_group[0]] -
            groups_means[
                :,
                bi_group[1]])
        groups_combined_std = np.sqrt(
            groups_var[
                :,
                bi_group[0]] +
            groups_var[
                :,
                bi_group[1]])
        p_val = t.sf(groups_mean_difference /
                     groups_combined_std, (len(experiment_groups[bi_group[0]]) +
                                           len(experiment_groups[bi_group[1]])) /
                     2)
        sorted_p_vals = np.sort(p_val, axis=0)
        lower_index = np.array(range(0, sorted_p_vals.shape[0])) *\
            target_p_value / sorted_p_vals.shape[0]
        pre_filter_mask = sorted_p_vals <= lower_index
        filter_mask = pre_filter_mask
        if np.any(pre_filter_mask):
            refined_threshold = np.max(sorted_p_vals[pre_filter_mask])
            filter_mask = p_val < refined_threshold
        group_comparison.append((p_val, filter_mask))

    return group_comparison

Пример #44

0

Показать файл

Файл: xcorr.py Проект: estebanhurtado/cutedots

 def pVal(self):
     p = {}
     for name, sab in self.sab.items():
         ssa = self.ssa[name]
         ssb = self.ssb[name]
         dof = self.dof[name]
         r = CorrCurves.calc(sab, ssa, ssb)
         df = dof - 1
         t = r * np.sqrt(df/(1-r**2))
         rawP = tDist.sf(np.abs(t), df)
         p[name] = CorrCurves.bonferroni(rawP)
         
     return p

Пример #45

0

Показать файл

Файл: featureGetter.py Проект: roy651/tm_spring_2016

 def pairedTTest(y1, y2):
     y1, y2 = array(y1), array(y2)
     n = len(y1)
     y_diff = y1 - y2
     y_diff_mean, yfcra_sd = mean(y_diff), std(y_diff)
     t = y_diff_mean / (yfcra_sd / sqrt(n))
     p = spt.sf(np.abs(t), n-1)
     y1_mean, y1_std = mean(y1), std(y1)
     y1_y1z = (y1 - y1_mean) / y1_std
     y2_y1z = (y2 - y1_mean) / y1_std
     #assert mean(y1_y1z) == 0.000, "y1 mean not zero, %.5f" % mean(y1_y1z) #will be close enough to zero
     d = mean(y2_y1z)
     return (t, p, d)

Пример #46

0

Показать файл

Файл: two_sample_t_test.py Проект: rostar/rostar

 def different_stdev_explicite(self, alpha, y1, y2, S1, S2, n1, n2):
     t0 = (y1 - y2) / (np.sqrt(S1 ** 2 / n1 + S2 ** 2 / n2))
     # hypothesis testing2
     df = int((S1**2/n1+S2**2/n2)**2/((S1**2/n1)**2/(n1-1)+(S2**2/n2)**2/(n2-1)))
     H1a = t.ppf(1 - alpha/2., df) < np.abs(t0)
     H1b = t.ppf(1 - alpha, df) < t0
     H1c = t.ppf(alpha, df) > t0
     # p-value
     p1a = t.sf(np.abs(t0), df) * 2
     p1b = t.sf(t0, df)
     p1c = t.cdf(t0, df)
     c1 = y1 - y2 - t.ppf(1 - alpha/2., df) * np.sqrt(S1**2/n1+S2**2/n2)        
     c2 = y1 - y2 + t.ppf(1 - alpha/2., df) * np.sqrt(S1**2/n1+S2**2/n2)
     CI = (c1, c2)
     print 'at the level of significance ', alpha, ':'
     print 'H1 mu1 != mu2 is ', H1a
     print 'H1 mu1 > mu2 is ', H1b
     print 'H1 mu1 < mu2 is ', H1c
     print 'probability of type I error for mu1 != mu2:', p1a
     print 'probability of type I error for mu1 > mu2:', p1b
     print 'probability of type I error for mu1 < mu2:', p1c
     print 'CI (%.1f%%) for mu1 - mu2:' %(100-100*alpha), CI, CI/y1

Пример #47

0

Показать файл

Файл: contrast.py Проект: takluyver/statsmodels

 def __init__(self, t=None, F=None, sd=None, effect=None, df_denom=None,
              df_num=None):
     if F is not None:
         self.fvalue = F
         self.df_denom = df_denom
         self.df_num = df_num
         self.pvalue = fdist.sf(F, df_num, df_denom)
     else:
         self.tvalue = t
         self.sd = sd
         self.effect = effect
         self.df_denom = df_denom
         self.pvalue = student_t.sf(np.abs(t), df_denom)

Пример #48

0

Показать файл

Файл: Correlation coefficient p-value calculator (Python 3 TclTk).py Проект: gmanuel89/Public-Python

def show_correlation_coefficient_stats():
    # Get the values from the entries in the window
    try:
        correlation_coefficient = float(enter_coefficient.get())
    except:
        correlation_coefficient = ''
    try:
        number_of_samples = int(enter_number_of_samples.get())
    except:
        number_of_samples = ''
    try:
        tails = int(enter_tails.get())
    except:
        tails = ''
    try:
        correlation_type = enter_correlation_type.get()
    except:
        correlation_type = ''
    try:
        level_of_significance = float(enter_level_of_significance.get())
    except:
        level_of_significance = ''
    # Fix the values
    if tails == '':
        tails = 2
    if correlation_type == '':
        correlation_type = 'pearson'
    if level_of_significance == '':
        level_of_significance = 0.05
    # Return the alarm
    if (correlation_coefficient == '' or number_of_samples == ''):
        messagebox.showwarning(title="Error", message="Missing critical values!")
    else:
        ####### Calculation of the Student's t-distribution
        degrees_of_freedom = number_of_samples-2
        if correlation_type == "pearson":
            t_value = correlation_coefficient * sqrt((number_of_samples-2)/(1-correlation_coefficient**2))
            # Calculate the one-tail p-value
            p_value = t.sf(t_value, degrees_of_freedom)
            if tails == 1:
                messagebox.showinfo(title="Correlation p-value", message="The p-value for a %s correlation coefficient (of %s) computed on %s samples is: %s" %(correlation_type, correlation_coefficient, number_of_samples, p_value))
            # Calculate the two-tail p-value
            elif tails == 2:
                p_value = p_value*2
                messagebox.showinfo(title="Correlation p-value", message="The p-value for a %s correlation coefficient (of %s) computed on %s samples is: %s" %(correlation_type, correlation_coefficient, number_of_samples, p_value))
            ###################### Significance
            if p_value <= level_of_significance:
                messagebox.showinfo(title="Significance", message="The calculated correlation coefficient IS statistically significant at a level of significance of %s" %(level_of_significance))
            else:
                messagebox.showinfo(title="Significance", message="The calculated correlation coefficient is NOT statistically significant at a level of significance of %s" %(level_of_significance))

Пример #49

0

Показать файл

Файл: regressions.py Проект: bulik/ldsc

    def _overlap_output(self, category_names, overlap_matrix, M_annot, M_tot, print_coefficients):
        '''LD Score regression summary for overlapping categories.'''
        overlap_matrix_prop = np.zeros([self.n_annot,self.n_annot])
        for i in range(self.n_annot):
            overlap_matrix_prop[i, :] = overlap_matrix[i, :] / M_annot

        prop_hsq_overlap = np.dot(
            overlap_matrix_prop, self.prop.T).reshape((1, self.n_annot))
        prop_hsq_overlap_var = np.diag(
            np.dot(np.dot(overlap_matrix_prop, self.prop_cov), overlap_matrix_prop.T))
        prop_hsq_overlap_se = np.sqrt(
            np.maximum(0, prop_hsq_overlap_var)).reshape((1, self.n_annot))
        one_d_convert = lambda x: np.array(x).reshape(np.prod(x.shape))
        prop_M_overlap = M_annot / M_tot
        enrichment = prop_hsq_overlap / prop_M_overlap
        enrichment_se = prop_hsq_overlap_se / prop_M_overlap
        overlap_matrix_diff = np.zeros([self.n_annot,self.n_annot])
        for i in range(self.n_annot):
            if not M_tot == M_annot[0,i]:
                overlap_matrix_diff[i, :] = overlap_matrix[i,:]/M_annot[0,i] - \
                    (M_annot - overlap_matrix[i,:]) / (M_tot-M_annot[0,i])

        diff_est = np.dot(overlap_matrix_diff,self.coef)
        diff_cov = np.dot(np.dot(overlap_matrix_diff,self.coef_cov),overlap_matrix_diff.T)
        diff_se = np.sqrt(np.diag(diff_cov))
        diff_p = ['NA' if diff_se[i]==0 else 2*tdist.sf(abs(diff_est[i]/diff_se[i]),self.n_blocks) \
            for i in range(self.n_annot)]

        df = pd.DataFrame({
            'Category': category_names,
            'Prop._SNPs': one_d_convert(prop_M_overlap),
            'Prop._h2': one_d_convert(prop_hsq_overlap),
            'Prop._h2_std_error': one_d_convert(prop_hsq_overlap_se),
            'Enrichment': one_d_convert(enrichment),
            'Enrichment_std_error': one_d_convert(enrichment_se),
            'Enrichment_p':diff_p,
            'Coefficient': one_d_convert(self.coef),
            'Coefficient_std_error': self.coef_se,
            'Coefficient_z-score': one_d_convert(self.coef) / one_d_convert(self.coef_se)
        })
        if print_coefficients:
            df = df[['Category', 'Prop._SNPs', 'Prop._h2', 'Prop._h2_std_error',
                    'Enrichment','Enrichment_std_error', 'Enrichment_p',
                     'Coefficient', 'Coefficient_std_error','Coefficient_z-score']]
        else:
            df = df[['Category', 'Prop._SNPs', 'Prop._h2', 'Prop._h2_std_error',
                    'Enrichment','Enrichment_std_error', 'Enrichment_p']]
        return df

Пример #50

0

Показать файл

Файл: person.py Проект: 570468837/Daily-pracitce

def pers(x, y):
    assert len(x) == len(y)

    x_bar = mean(x)
    y_bar = mean(y)
    s_x = std(x, ddof=1)
    s_y = std(y, ddof=1)
    tmp = 0.0
    for i in range(0, len(x)):
        tmp += (x[i] - x_bar) * (y[i] - y_bar)

    r = tmp / (len(x) - 1) / s_x / s_y
    if r == 1:
        return [1, 0]
    tt = r * sqrt((len(x) - 2) / (1 - r ** 2))
    p = t.sf(abs(tt), len(x) - 2) * 2
    return [r, p]

Пример #51

0

Показать файл

Файл: t.py Проект: ronrest/pyrpy

def pt(x, df=1, loc=0, scale=1, ncp=None, lowertail=True, log=False):
    """
    The cumulative distribution function for the t distribution.
    You provide a value along the t distribution (eg x=3) or array of
    values, and it returns what proportion of values lie below it (the quantile)

    Alternatively, if you select lowertail=False, it returns the proportion of
    values that are above it.

    ARGS:
    ---------------
    :param x (float, array of floats):
        The values along the distribution.
    :param df (float):
        degrees of freedom
    :param loc: array_like, optional
        location parameter (default=0)
    :param scale: float, optional
        scale (default=1)
    :param ncp (float):
        non-centrality parameter delta.
        Currently not implemented.
    :param lowertail (bool):
        are you interested in what proportion of values lie beneath x? or
        above x (False)?
    :param log (boolean):
        Use log scale?

    RETURN:
    ---------------
    :return:
        an array of quantiles() corresponding to the values in x
    """
    if lowertail and not log:
        return t.cdf(x, df=df, loc=loc, scale=scale)
    elif not lowertail and not log:
        return t.sf(x, df=df, loc=loc, scale=scale)
    elif lowertail and log:
        return t.logcdf(x, df=df, loc=loc, scale=scale)
    else:
        return t.logsf(x, df=df, loc=loc, scale=scale)

Пример #52

0

Показать файл

Файл: austerity.py Проект: kacperChwialkowski/mcmc

def approximate_MH_accept(mu_0,log_lik,X,batch_size,epsilon,theta_prime, theta_t,N):

    iteration_number=0

    while True:
        iteration_number +=1
        n = iteration_number*batch_size
        n = min(n, N)
        sub = np.random.choice(X, n,replace=False)
        sub = log_lik(sub, theta_prime) - log_lik(sub, theta_t)
        l_hat = np.mean(sub)
        l_2_hat = np.mean(sub**2)
        s_l = np.sqrt(l_2_hat - l_hat**2*n/(n-1))
        s = s_l/ np.sqrt(n)*np.sqrt(1 - (n-1)/(N-1))
        t_students_var = (l_hat - mu_0) / s
        stat = np.abs(t_students_var)
        delta  = t.sf(stat, n-1)
        if delta < epsilon:
            if l_hat > mu_0:
                return True,n
            return False,n

Пример #53

0

Показать файл

Файл: test_regression.py Проект: NanoResearch/statsmodels

 def test_pvalue(self):
     assert_almost_equal(self.Ttest.pvalue, student_t.sf(
                     np.abs(self.res1.tvalues), self.res1.model.df_resid)*2,
                     DECIMAL_4)

Пример #54

0

Показать файл

Файл: arima_model.py Проект: collinstocks/statsmodels

 def pvalues(self):
 #TODO: same for conditional and unconditional?
     df_resid = self.df_resid
     return t.sf(np.abs(self.tvalues), df_resid) * 2

Пример #55

0

Показать файл

Файл: generalized_linear_model.py Проект: smc77/statsmodels

 def pvalues(self):
     return t.sf(np.abs(self.tvalues), self.df_resid)*2

Пример #56

0

Показать файл

Файл: plot_seed_correlations.py Проект: gsudre/research_code

subjs_fname = "/Users/sudregp/data/meg/good_subjects.txt"
group_fname = "/Users/sudregp/data/meg/%s_subjs.txt" % group
data_dir = "/Users/sudregp/data/results/meg/"
fid = open(subjs_fname, "r")
subjs = [line.rstrip() for line in fid]
fid.close()
fid = open(group_fname, "r")
this_group = [line.rstrip() for line in fid]
fid.close()

# load the pre-computed correlation data
fname = data_dir + "corrs-seed%d-%dto%d-lh.stc" % (seed_src, band[0], band[1])
stc = mne.read_source_estimate(fname)

y = [s in this_group for s in subjs]
y = np.asarray(y).T
X = np.mean(stc.data[:, y], axis=1)

print "Subjects in %s: %d" % (group, np.sum(y))
if fdr > 0:
    n = sum(y)
    # from http://www.danielsoper.com/statcalc3/calc.aspx?id=44
    tstat = X / np.sqrt((1 - X ** 2) / (n - 2))
    # t.sf gives the one tailed version
    pval = t.sf(tstat, n - 1) * 2
    reject_fdr, pval_fdr = mne.stats.fdr_correction(pval, alpha=fdr, method="indep")
    X[~reject_fdr] = 0

stc2 = mne.SourceEstimate(X[:, None], vertices=stc.vertno, tmin=0, tstep=0, subject="fsaverage")
brain = stc2.plot(hemi="both", fmin=min(X), fmid=(min(X) + (max(X) - min(X)) / 2), fmax=max(X))

Пример #57

0

Показать файл

Файл: texi.py Проект: nathanxma/open_source

 def pval(x, standard_error, df=800, tail=2):
     pval = t.sf(np.abs((x-0)/standard_error), df) * tail
     return pval

Пример #58

0

Показать файл

Файл: ols.py Проект: Black-Milk/pandas

    def _p_value_raw(self):
        """Returns the raw p values."""
        from scipy.stats import t

        return 2 * t.sf(np.fabs(self._t_stat_raw),
                        self._df_resid_raw)

Пример #59

0

Показать файл

Файл: linear_model.py Проект: pf4d/cslvr

def glm(x,y,w=1.0):

  p,n    = shape(x)                    # sample size
  p     += 1                           # add one for intercept
  dof    = n - p                       # degrees of freedom
  
  sig    = var(y)                      # variance
  mu     = (y + mean(y))/2.0           # initial mean estimate
  eta    = log(mu)                     # initial predictor
  X      = vstack((ones(n), x)).T      # observed x-variable matrix

  # Newton-Raphson :
  converged = False
  rtol      = 1e-12
  dtol      = 1e-12
  lmbda     = 1.0
  nIter     = 0
  deviance  = 1
  D         = 1
  ahat      = zeros(p)   # initial parameters
  rel_res   = zeros(p)   # initial relative residual
  maxIter   = 100

  rel_a = []
  dev_a = []

  while not converged and nIter < maxIter:
    W       = diags(w*mu**2/sig, 0)         # compute weights
    z       = eta + (y - mu)/mu             # adjusted dependent variable

    WX      = W.dot(X)
    XTWX    = dot(X.T, WX)
    iXTWX   = inv(XTWX)
    Wz      = W.dot(z)

    ahat_n  = dot(iXTWX, dot(X.T, Wz))
    
    eta     = dot(X, ahat_n)               # compute estimates
    mu      = exp(eta)                     # linear predictor

    # calculate residual :
    rel_res  = norm(ahat - ahat_n, inf)
    rel_a.append(rel_res)
    ahat     = ahat_n

    D_n      = sum((y - mu)**2)
    deviance = abs(D_n - D)
    D        = D_n
    dev_a.append(deviance)
    
    if rel_res < rtol or deviance < dtol: converged = True
    nIter +=  1

    string = "Newton iteration %d: d (abs) = %.2e, (tol = %.2e) r (rel) = %.2e (tol = %.2e)"
    print string % (nIter, deviance, dtol, rel_res, rtol)
  
  # calculate statistics :
  varA   = diag(iXTWX)            # variance of alpha hat
  sea    = sqrt(varA)             # vector of standard errors for alpha hat
  t_a    = ahat / sea
  pval   = t.sf(abs(t_a), dof) * 2
  conf   = 0.95                        # 95% confidence interval
  tbonf  = t.ppf((1 - conf/p), dof)    # bonferroni corrected t-value
  ci     = tbonf*sea                   # confidence interval for ahat
  resid  = (y - mu)                    # 'working' residual
                                       
  RSS    = sum((y - mu)**2)            # residual sum of squares
  TSS    = sum((y - mean(y))**2)       # total sum of squares
  R2     = (TSS-RSS)/TSS               # R2
  F      = (TSS-RSS)/(p-1) * (n-p)/RSS # F-statistic
  F_p    = fdtrc(p-1, dof, F)          # F-Stat. p-value

  # log-likelihood :
  L      = sum((y*mu - mu**2/2)/(2*sig) - y**2/(2*sig) - 0.5*log(2*pi*sig))
  AIC    = (-2*L + 2*p)/n              # AIC statistic

  # estimated error variance :
  sighat = 1/(n-p) * RSS
                                        
  vara = { 'ahat'  : ahat,              
           'yhat'  : mu,                
           'sea'   : sea,               
           'ci'    : ci,                
           'dof'   : dof,               
           'resid' : resid,             
           'rel_a' : rel_a,
           'dev_a' : dev_a,
           'R2'    : R2,
           'F'     : F,
           'AIC'   : AIC,
           'sighat': sighat}
  return vara

Пример #60

0

Показать файл

Файл: avg_func_conn_across_subjects.py Проект: JohnGriffiths/lsnm_in_python

fc_fmri_ctl_a= fc_fmri_ctl_var / 9.0
fc_fmri_dms_a= fc_fmri_dms_var / 9.0
#     (3) Add results obtained for CTL and DMS in step (2) together:
fc_syn_a = fc_syn_ctl_a + fc_syn_dms_a
fc_fmri_a= fc_fmri_ctl_a+ fc_fmri_dms_a
#     (4) Take the square root the results in step (3):
sqrt_fc_syn_a = np.sqrt(fc_syn_a)
sqrt_fc_fmri_a= np.sqrt(fc_fmri_a)
#     (5) Divide the results of step (1) by the results of step (4) to obtain 't':
fc_syn_t = fc_syn_mean_diff  / sqrt_fc_syn_a
fc_fmri_t= fc_fmri_mean_diff / sqrt_fc_fmri_a
#     (6) Calculate the degrees of freedom (add up number of observations for each group
#         minus number of groups):
dof = 10 + 10 - 2
#     (7) find the p-values for the above 't' and 'degrees of freedom':
fc_syn_p_values  = t.sf(fc_syn_t, dof)
fc_fmri_p_values = t.sf(fc_fmri_t, dof)

print 't-values for synaptic activity correlations: ', fc_syn_t
print 't-values for fmri time-series correlations: ', fc_fmri_t

# convert to Pandas dataframe, using the transpose to convert to a format where the names
# of the modules are the labels for each time-series
fc_mean = pd.DataFrame(np.array([fc_syn_dms_mean, fc_syn_ctl_mean,
                                 fc_fmri_dms_mean, fc_fmri_ctl_mean]),
                      columns=np.array(['V1', 'V4', 'FS', 'D1', 'D2', 'FR', 'LIT']),
                       index=np.array(['DMS-syn', 'CTL-syn', 'DMS-fmri', 'CTL-fmri']))
#fc_std  = pd.DataFrame(np.array([fc_syn_dms_std, fc_syn_ctl_std,
#                                 fc_fmri_dms_std, fc_fmri_ctl_std]),
#                      columns=np.array(['V1', 'V4', 'D1', 'D2', 'FS', 'FR']),
#                       index=np.array(['DMS-syn', 'CTL-syn', 'DMS-fmri', 'CTL-fmri']))

Python sf примеры использования