def calculate_power_from_cohens_d(d, n_1, n_2, alpha): df = n_1 + n_2 - 2 denominator = (1 / n_1 + 1 / n_2)**0.5 t_crit_os = t.ppf(q=1 - alpha, df=df) t_crit_ts = t.ppf(q=1 - alpha / 2, df=df) # Create Non-Centralized t-distribution nc = abs(d) * (2 / (1 / n_1 + 1 / n_2) / 2)**0.5 nct_dist = utils.initialize_nct_distribution(df=df, nc=nc) power_os = 1 - nct_dist.cdf(x=t_crit_os) power_ts = 1 - nct_dist.cdf(x=t_crit_ts) return [[power_os], [power_ts]]
def calculate_power_from_means(mu_1, sigma_1, n_1, mu_2, sigma_2, n_2, alpha): diff = abs(mu_1 - mu_2) df = utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2, n_2) t_crit_os = t.ppf(q=1 - alpha, df=df) t_crit_ts = t.ppf(q=1 - alpha / 2, df=df) # Create Non-Centralized t-distribution d = utils.calculate_cohens_d(mu_1, sigma_1, n_1, mu_2, sigma_2, n_2) nc = d * (2 / (1 / n_1 + 1 / n_2) / 2)**0.5 nct_dist = utils.initialize_nct_distribution(df=df, nc=nc) power_os = 1 - nct_dist.cdf(x=t_crit_os) power_ts = 1 - nct_dist.cdf(x=t_crit_ts) return [[power_os], [power_ts]]
def create_power_from_d_formula(d, n_1, n_2, alpha): formulae = [] df = n_1 + n_2 - 2 sig = 1 - alpha/2 t_crit = t.ppf(q=sig, df=df) step_1 = "t_{{crit}} = t_{{1-\\alpha/2, \ \\upsilon}} = t_{{{:.3f}, \ {}}} = {:.3f}" formulae.append(step_1.format(sig, df, t_crit)) step_2 = "\\beta = P(T <= t_{{crit}})\ where\ T\ \\sim\ t_{{\\upsilon={},\ \\mu={:.3f}}}" nc = abs(d) * (2 / (1/n_1 + 1/n_2) / 2)**0.5 formulae.append(step_2.format(df, nc)) nct_dist = utils.initialize_nct_distribution(df=df, nc=nc) beta = nct_dist.cdf(x=t_crit) step_3 = "\\beta = P(T <= {:.3f}) = {:.3f}" formulae.append(step_3.format(t_crit, beta)) step_4 = "1 - \\beta = 1 - {:.3f} = {:.3f}" formulae.append(step_4.format(beta, 1 - beta)) return formulae
def create_power_from_means_formula(mu_1, sigma_1, n_1, mu_2, sigma_2, n_2, alpha): formulae = [] df = int(utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2, n_2)) sig = 1 - alpha/2 t_crit = t.ppf(q=sig, df=df) step_1 = "t_{{crit}} = t_{{1-\\alpha/2, \ \\upsilon}} = t_{{{:.3f}, \ {}}} = {:.3f}" formulae.append(step_1.format(sig, df, t_crit)) step_2 = "\\beta = P(T <= t_{{crit}})\ where\ T\ \\sim\ t_{{\\upsilon={},\ \\mu={:.3f}}}" d = utils.calculate_cohens_d(mu_1, sigma_1, n_1, mu_2, sigma_2, n_2) nc = abs(d) * (2 / (1/n_1 + 1/n_2) / 2)**0.5 formulae.append(step_2.format(df, nc)) nct_dist = utils.initialize_nct_distribution(df=df, nc=nc) beta = nct_dist.cdf(x=t_crit) step_3 = "\\beta = P(T <= {:.3f}) = {:.3f}" formulae.append(step_3.format(t_crit, beta)) step_4 = "1 - \\beta = 1 - {:.3f} = {:.3f}" formulae.append(step_4.format(beta, 1 - beta)) return formulae
def generate_sampling_distributions_chart_data(mu_1, mu_2, sigma_1, sigma_2, n_1, n_2, alpha): n = n_1 + n_2 - 2 df = utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2, n_2) H0_mean = 0 HA_mean = mu_2 - mu_1 sd_pooled = utils.calculate_pooled_standard_deviation(n_1, n_2, sigma_1, sigma_2) se = sd_pooled * (1/n_1 + 1/n_2)**0.5 d = utils.calculate_cohens_d(mu_1=mu_1, sigma_1=sigma_1, n_1=n_1, mu_2=mu_2, sigma_2=sigma_2, n_2=n_2) nc = d * (2 / (1/n_1 + 1/n_2) / 2)**0.5 if mu_1 > mu_2: nc *= -1 # Determine X axis range x_min = min(H0_mean, nc) - 4 x_max = max(H0_mean, nc) + 4 x_axis_values = list(np.linspace(start=x_min, stop=x_max, num=1000, endpoint=True)) alpha_lower = t.ppf(q=alpha/2, df=df) alpha_upper = -1 * alpha_lower # Insert key values for val in [H0_mean, nc, alpha_lower, alpha_upper]: if val not in x_axis_values: bisect.insort(x_axis_values, val) H0_significant = [] H0_not_significant = [] HA_powered = [] HA_unpowered = [] threshold = alpha_upper if HA_mean >= H0_mean else alpha_lower nct_dist = utils.initialize_nct_distribution(df=df, nc=nc) for value in x_axis_values: # Null Hypothesis H0_not_significant.append(t.pdf(x=value, df=df)) if value < alpha_lower or value > alpha_upper: H0_significant.append(t.pdf(x=value, df=df)) else: H0_significant.append(None) # Alternative Hypothesis HA_powered.append(nct_dist.pdf(x=value)) if HA_mean < H0_mean and value > alpha_lower: HA_unpowered.append(nct_dist.pdf(x=value)) elif HA_mean >= H0_mean and value < alpha_upper: HA_unpowered.append(nct_dist.pdf(x=value)) else: HA_unpowered.append(None) if HA_mean < H0_mean: power = nct_dist.cdf(x=alpha_lower) threshold = alpha_lower else: power = 1 - nct_dist.cdf(x=alpha_upper) threshold = alpha_upper decimal_points = utils.determine_decimal_points(x_max) format_string = "{:." + str(decimal_points) + "f}" return { "title": "Central and Noncentral Distributions (effect size: {:0.3f}, α: {:0.3f}, power (1 - β): {:.1%})".format(d, alpha, power), "xAxisLabel": "t statistic", "yAxisLabel": "Density", "labels": [format_string.format(x) for x in x_axis_values], "verticalLine": { "position": format_string.format(utils.find_closest_value(x_axis_values, threshold)), "label": "t crit: " + format_string.format(threshold) }, "hidePoints": True, "dataset": [ { "label": "H0 - Significant", "data": H0_significant, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "H0", "data": H0_not_significant, "borderColor": colors.line_colors[0], "backgroundColor": None }, { "label": "HA - Powered", "data": HA_powered, "borderColor": colors.line_colors[1], "backgroundColor": None }, { "label": "HA", "data": HA_unpowered, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] } ] }