示例#1
0
def test_chisquare_effectsize():

    pr1 = np.array([1020, 690, 510, 420, 360])
    pr2 = np.array([1050, 660, 510, 420, 360])
    #> library(pwr)
    #> ES.w1(pr1/3000, pr2/3000)
    es_r = 0.02699815282115563
    es1 = chisquare_effectsize(pr1, pr2)
    es2 = chisquare_effectsize(pr1, pr2, cohen=False)
    assert_almost_equal(es1, es_r, decimal=14)
    assert_almost_equal(es2, es_r**2, decimal=14)

    # regression tests for correction
    res1 = chisquare_effectsize(pr1,
                                pr2,
                                cohen=False,
                                correction=(3000, len(pr1) - 1))
    res0 = 0  #-0.00059994422693327625
    assert_equal(res1, res0)
    pr3 = pr2 + [0, 0, 0, 50, 50]
    res1 = chisquare_effectsize(pr1,
                                pr3,
                                cohen=False,
                                correction=(3000, len(pr1) - 1))
    res0 = 0.0023106468846296755
    assert_almost_equal(res1, res0, decimal=14)
示例#2
0
def test_chisquare_effectsize():

    pr1 = np.array([1020, 690, 510, 420, 360])
    pr2 = np.array([1050, 660, 510, 420, 360])
    #> library(pwr)
    #> ES.w1(pr1/3000, pr2/3000)
    es_r = 0.02699815282115563
    es1 = chisquare_effectsize(pr1, pr2)
    es2 = chisquare_effectsize(pr1, pr2, cohen=False)
    assert_almost_equal(es1, es_r, decimal=14)
    assert_almost_equal(es2, es_r**2, decimal=14)

    # regression tests for correction
    res1 = chisquare_effectsize(
        pr1, pr2, cohen=False, correction=(3000, len(pr1) - 1))
    res0 = 0  #-0.00059994422693327625
    assert_equal(res1, res0)
    pr3 = pr2 + [0, 0, 0, 50, 50]
    res1 = chisquare_effectsize(
        pr1, pr3, cohen=False, correction=(3000, len(pr1) - 1))
    res0 = 0.0023106468846296755
    assert_almost_equal(res1, res0, decimal=14)
示例#3
0
def calc_chisquared_sample_size(
        baseline_conversion_rate_percentage: np.float64,
        expected_uplift_percentage: np.float64,
        power_percentage: np.float64 = 80,
        confidence_level_percentage: np.float64 = 95) -> np.float64:
    """Estimates the minimum sample size when the KPI is conversion rate.

  Estimated sample size using the Chi-squared test of proportions is the
    minimum required for either a Test or a Control group in an A/B test.

  Args:
    baseline_conversion_rate_percentage: Baseline conversion rate as a
      percentage.
    expected_uplift_percentage: Expected uplift of the media experiment on the
      baseline conversion rate as a percentage.
    power_percentage: Statistical power of the Chi-squared test as a percentage.
    confidence_level_percentage: Statistical confidence level of the Chi-squared
      test as a percentage.

  Returns:
    sample_size: Estimated minimum sample size required for either a Test or
      a Control group.
  """
    null_probability = baseline_conversion_rate_percentage / 100
    alternative_probability = (null_probability *
                               (100 + expected_uplift_percentage) / 100)
    alpha_proportion = (100 - confidence_level_percentage) / 100
    power_proportion = power_percentage / 100

    effect_size = gof.chisquare_effectsize(
        probs0=[null_probability, 1 - null_probability],
        probs1=[alternative_probability, 1 - alternative_probability],
        correction=None,
        cohen=True,
        axis=0)
    power_test = power.GofChisquarePower()
    sample_size = power_test.solve_power(effect_size=effect_size,
                                         nobs=None,
                                         alpha=alpha_proportion,
                                         power=power_proportion,
                                         n_bins=2)

    return np.ceil(sample_size)
示例#4
0
def log_results(log, result, source):
    """
    Log the fitting results.

    Notes
    -----
    The resulting mixture parameters are stored into a 2d array with rows
    [location in degrees (mu), shape (kappa), probability].
    """
    sparams = result.model.get_summary_params(result.full_params)[:, [1, 0, 2]]
    sparams[:, 0] = tr.transform_pi_deg(tr.fix_range(sparams[:, 0]),
                                        neg_shift=source.neg_shift)
    converged = result.mle_retvals['converged']

    fit_criteria = [-result.llf, result.aic, result.bic]
    print 'llf / nobs:', fit_criteria[0] / result.model.endog.shape[0]

    chisquare = result.gof_chisquare()

    # Chisquare test with effect size.
    alpha = 0.05 # Significance level.
    data = source.source_data.data
    n_obs = data[:, 1].sum()
    rad_diff = data[1, 0] - data[0, 0]

    pdf = result.model.pdf_mix(result.full_params, data[:, 0])
    probs = pdf * rad_diff * n_obs
    effect_size = gof.chisquare_effectsize(data[:, 1], probs)
    chi2 = gof.chisquare(data[:, 1], probs, value=effect_size)
    power = gof.chisquare_power(effect_size, n_obs,
                                data.shape[0], alpha=alpha)

    chisquare_all = list(chisquare) + [n_obs, effect_size] \
                    + list(chi2) + [power]

    log.write_row(source.current.dir_base, source.current.base_names,
                  chisquare_all, sparams, converged, fit_criteria)
示例#5
0
def log_results(log, result, source):
    """
    Log the fitting results.

    Notes
    -----
    The resulting mixture parameters are stored into a 2d array with rows
    [location in degrees (mu), shape (kappa), probability].
    """
    sparams = result.model.get_summary_params(result.full_params)[:, [1, 0, 2]]
    sparams[:, 0] = tr.transform_pi_deg(tr.fix_range(sparams[:, 0]),
                                        neg_shift=source.neg_shift)
    converged = result.mle_retvals['converged']

    fit_criteria = [-result.llf, result.aic, result.bic]
    print 'llf / nobs:', fit_criteria[0] / result.model.endog.shape[0]

    chisquare = result.gof_chisquare()

    # Chisquare test with effect size.
    alpha = 0.05  # Significance level.
    data = source.source_data.data
    n_obs = data[:, 1].sum()
    rad_diff = data[1, 0] - data[0, 0]

    pdf = result.model.pdf_mix(result.full_params, data[:, 0])
    probs = pdf * rad_diff * n_obs
    effect_size = gof.chisquare_effectsize(data[:, 1], probs)
    chi2 = gof.chisquare(data[:, 1], probs, value=effect_size)
    power = gof.chisquare_power(effect_size, n_obs, data.shape[0], alpha=alpha)

    chisquare_all = list(chisquare) + [n_obs, effect_size] \
                    + list(chi2) + [power]

    log.write_row(source.current.dir_base, source.current.base_names,
                  chisquare_all, sparams, converged, fit_criteria)
示例#6
0
crit = stats.chi2.isf(0.05, n_bins - 1)
power = stats.ncx2.sf(crit, n_bins-1, 0.001**2 * nobs)
#> library(pwr)
#> tr = pwr.chisq.test(w =0.001, N =30000 , df = 5-1, sig.level = 0.05, power = NULL)
assert_almost_equal(power, 0.05147563, decimal=7)
effect_size = 0.001
power = chisquare_power(effect_size, nobs, n_bins, alpha=0.05)
assert_almost_equal(power, 0.05147563, decimal=7)
print(chisquare(freq, nobs*probs, value=0, ddof=0))
d_null_alt = ((probs - probs_d)**2 / probs).sum()
print(chisquare(freq, nobs*probs, value=np.sqrt(d_null_alt), ddof=0))


#Monte Carlo to check correct size and power of test

d_delta_r = chisquare_effectsize(probs, probs_d)
n_rep = 10000
nobs = 3000
res_boots = np.zeros((n_rep, 6))
for i in range(n_rep):
    rvs = np.argmax(np.random.rand(nobs,1) < probs_cs, 1)
    freq = np.bincount(rvs)
    res1 = chisquare(freq, nobs*probs)
    res2 = chisquare(freq, nobs*probs_d)
    res3 = chisquare(freq, nobs*probs_d, value=d_delta_r)
    res_boots[i] = [res1[0], res2[0], res3[0], res1[1], res2[1], res3[1]]

alpha = np.array([0.01, 0.05, 0.1, 0.25, 0.5])
chi2_power = chisquare_power(chisquare_effectsize(probs, probs_d), 3000, n_bins,
                             alpha=[0.01, 0.05, 0.1, 0.25, 0.5])
print((res_boots[:, 3:] < 0.05).mean(0))