Пример #1
0
def t_test(data1, data2=None, tail='both', mu=0, equal=True):
    assert tail in ['both', 'left',
                    'right'], 'tail should be one of "both","left","right"'
    if data2 is None:
        mean_val = mean(data1)
        se = std(data1)/sqrt(len(data1))
        t_val = (mean_val-mu)/se
        df = len(data1)-1
    else:
        n1 = len(data1)
        n2 = len(data2)
        mean_diff = mean(data1)-mean(data2)
        sample1_var = variance(data1)
        sample2_var = variance(data2)
        if equal:
            sw = sqrt(((n1-1)*sample1_var+(n2-1)*sample2_var)/(n1+n2-2))
            t_val = (mean_diff-mu)/(sw*sqrt(1/n1+1/n2))
            df = n1 + n2 - 2
        else:
            se = sqrt(sample1_var/n1+sample2_var/n2)
            t_val = (mean_diff-mu)/se
            df_numerator = (sample1_var/n1+sample2_var/n2)**2
            df_denominator = (sample1_var/n1)**2/(n1-1) + \
                (sample2_var/n2)**2/(n2-1)
            df = df_numerator/df_denominator
    if tail == "both":
        p = 2*(1-t.cdf(abs(t_val), df))
    elif tail == "left":
        p = t.cdf(t_val, df)
    else:
        p = 1-t.cdf(t_val, df)
    return t_val, df, p
Пример #2
0
def mean_one_sided_lower_ci_est(data,alpha,sigma = None):
    n = len(data)
    if sigma is None:   # 未知总体方差,使用t分布
        t_value = abs(t.ppf(alpha,n-1))
        s = std(data)
        return  mean(data) - s / sqrt(n) * t_value,inf

    else: # 知道总体方差,使用标准正态分布
        z_value = abs(norm.ppf(alpha))
        return mean(data) - sigma/sqrt(n) * z_value,inf
Пример #3
0
def mean_ci_est(data, alpha, sigma=None):  # confidence interval
    n = len(data)
    sample_mean = mean(data)

    if sigma is None:
        s = std(data)
        se = s/sqrt(n)
        t_value = abs(t.ppf(alpha/2,n-1))
        return sample_mean - se * t_value, sample_mean + se * t_value
    else:
        se = sigma/sqrt(n)
        z_value = abs(norm.ppf(alpha / 2)) # ppf默认下分位点,故使用abs
        return sample_mean - se * z_value, sample_mean + se * z_value
Пример #4
0
def mean_ci_est(data, alpha, sigma=None):
    n = len(data)
    sample_mean = mean(data)

    if sigma is None:
        # 方差未知
        s = std(data)
        se = s/sqrt(n)
        t_value = abs(t.ppf(alpha/2, n-1))
        return sample_mean - se * t_value, sample_mean + se * t_value
    else:
        # 方差已知
        se = sigma/sqrt(n)
        z_value = abs(norm.ppf(alpha/2))
        return sample_mean - se * z_value, sample_mean + se * z_value
Пример #5
0
def mean_ci_est(data, alpha, sigma=None):
    """均值的区间估计"""
    n = len(data)
    sample_mean = mean(data)

    if sigma is None:
        # 方差未知
        s = std(data)
        me = s / np.sqrt(n)
        t_value = abs(t.ppf(alpha/2, n-1))
        return round(sample_mean - me * t_value, 2), round(sample_mean + me * t_value, 2)
    else:
        # 方差已知
        me = sigma / np.sqrt(n)
        z_value = abs(norm.ppf(alpha/2))
        return round(sample_mean - me * z_value, 2), round(sample_mean + me * z_value, 2)
def t_test(data1, data2=None, tail='both', mu=0.0, equal=True):

    assert tail in ['both', 'left',
                    'right'], 'tail should be one of "both", "left", "right"'

    if data2 is None:
        # 单个总体的情况
        mean_val = mean(data1)
        se = std(data1) / np.sqrt(len(data1))
        t_val = (mean_val - mu) / se
        df = len(data1) - 1
    else:
        # 两个总体的情况
        n1 = len(data1)
        n2 = len(data2)
        mean_diff = mean(data1) - mean(data2)
        sample1_var = variance(data1)
        sample2_var = variance(data2)
        if equal:
            # 方差相等的情况
            sw = np.sqrt((((n1 - 1) * sample1_var + (n2 - 1) * sample2_var)) /
                         (n1 + n2 - 2))
            t_val = (mean_diff - mu) / (sw * np.sqrt(1 / n2 + 1 / n2))
            df = n1 + n2 - 2
        else:
            # 方差不等的情况
            se = np.sqrt(sample1_var / n1 + sample2_var / n2)
            t_val = (mean_diff - mu) / se
            df = (sample1_var / n1 + sample2_var / n2)**2 / (
                (sample1_var / n1)**2 / (n1 - 1) + (sample2_var / n2)**2 /
                (n2 - 1))

    if tail == 'both':
        # 双尾检验
        p = 2 * (1 - t.cdf(abs(t_val), df))
    elif tail == 'left':
        # 左尾检验
        p = t.cdf(t_val, df)
    else:
        # 右尾检验
        p = 1 - t.cdf(t_val, df)

    return round(t_val, 2), round(df, 2), p
def mean_ci_est(data, alpha, sigma=None):
    """
        总体方差未知,求均值的置信空间
        总体方差已知,求均值的置信空间
        data为传入的样本; alpha,sigma为需要传入的置信水平,sigma的值
    """
    n = len(data)  #求样本容量
    sample_mean = mean(data)  #求样本均值

    if sigma is None:  #方差未知
        s = std(data)  #求样本方差
        se = s / sqrt(n)  #求标准误
        t_value = abs(t.ppf(alpha / 2, n - 1))  #求Z
        return sample_mean - se * t_value, sample_mean + se * t_value
    else:  #方差已知
        se = sigma / sqrt  #求标准误
        z_value = abs(norm.ppf(alpha /
                               2))  #求Z,由于取的Z alpha/2默认是返回坐标左边的面积,所以需要取绝对值
        return sample_mean - se * z_value, sample_mean + se * z_value
Пример #8
0
    # 测试频率
    print(frequency(data))

    # 测试众数
    print(mode(data))

    # 测试中位数
    print(median(data))

    # 测试均值
    print(mean(data))

    # 测试极差
    print(rng(data))

    # 测试四分位数
    print(quartile(data))

    # 测试方差
    print(variance(data))

    # 测试标准差
    print(std(data))






        总体均值未知,求方差的置信空间
        data为传入的样本; alpha为需要传入的置信水平的值
    """
    n = len(data)  #求样本容量
    s2 = variance(data)  #求样本方差

    chi2_lower_value = chi2.ppf(
        alpha / 2, n - 1)  #求坐标左侧Z面积,没错你没看错,因为数学证明的过程中是以右侧为基准的,但是scipy是以左侧为基准的
    chi2_upper_value = chi2.ppf(1 - alpha / 2, n - 1)  #求坐标右侧Z面积
    return (n - 1) * s2 / chi2_upper_value, (n - 1) * s2 / chi2_lower_value


if __name__ == '__main__':
    salary_18 = [1484, 785, 1598, 1366, 1716, 1020, 1716, 785, 3113,
                 1601]  #18岁月收入数据
    salary_35 = [902, 4508, 3809, 3923, 4276, 2065, 1601, 553, 3345,
                 2182]  #35岁月收入数据

    print(mean(salary_18))  #平均月收入的点估计
    print(mean_ci_est(salary_18, 0.05))  #平均月收入的区间估计
    print(mean(salary_35))  #平均月收入的点估计
    print(mean_ci_est(salary_35, 0.05))  #平均月收入的区间估计
    print()
    print(std(salary_18))  #整体方差的点估计开根
    print(variance(salary_18))  #整体方差的点估计(样本方差)
    print(var_ci_est(salary_18, 0.05))  #区间估计
    print(std(salary_35))  #整体方差的点估计开根
    print(variance(salary_35))  #整体方差的点估计(样本方差)
    print(var_ci_est(salary_35, 0.05))  #区间估计
    print()