def generate_t_distribution_chart_data(alpha, t_stat, n_1, n_2, x_bar_1, x_bar_2, s_1, s_2): welches_df = utils.welches_degrees_of_freedom(s_1, n_1, s_2, n_2) d = utils.calculate_cohens_d(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2) alpha_upper = t.ppf(1 - alpha/2, df=welches_df) alpha_lower = -alpha_upper # Determine X axis range x_min = -5 x_max = 5 x_axis = list(np.arange(x_min, x_max, (x_max - x_min) / 501)) x_axis_values = ["{:.3f}".format(x) for x in x_axis] H0 = [] significant = [] for value in x_axis: H0.append(t.pdf(value, df=welches_df)) if alpha_lower <= value <= alpha_upper: significant.append(None) else: significant.append(t.pdf(value, df=welches_df)) return { "title": "t-statistic distribution (effect size: {:.3f})".format(d), "xAxisLabel": "t", "yAxisLabel": "Density", "labels": x_axis_values, "verticalLine": { "position": "{:.3f}".format(utils.find_closest_value(x_axis, t_stat)), "label": "t statistic: {:.3f}".format(t_stat) }, "dataset": [ { "label": "H0", "data": H0, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": None }, { "label": "H0", "data": significant, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] } ] }
def generate_p_value_vs_effect_size_chart_data(n_1, n_2, x_bar_1, x_bar_2, s_1, s_2, alpha): d_raw = utils.calculate_cohens_d(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2) d_adjustment = utils.calculate_d_adjustment(s_1, n_1, s_2, n_2) d_results = tt.calculate_min_effect_size(n_1=n_1, n_2=n_2, alpha=alpha, power=0.5) d_target = d_results[1][0] / d_adjustment ff = 0.5 if d_raw < 0: x_min = min(-d_target, d_raw) * (1 + ff) x_max = max(-d_target, d_raw) * (1 - ff) else: x_min = min(d_target, d_raw) * (1 - ff) x_max = max(d_target, d_raw) * (1 + ff) # Rounding to ensure matches step = (x_max - x_min) / 500 dps = utils.determine_decimal_points(x_max) effect_sizes = [round(x, dps) for x in np.arange(x_min, x_max, step)] d_actual = utils.find_closest_value(effect_sizes, d_raw) d_target = utils.find_closest_value(effect_sizes, d_target) os_lower = [] ts_lower = [] os_higher = [] ts_higher = [] for d in effect_sizes: welches_df = utils.welches_degrees_of_freedom(s_1, n_1, s_2, n_2) t_stat = tt.calculate_t_stat_from_cohens_d(d, n_1, n_2) * d_adjustment results = tt.calculate_p_value(t_stat, welches_df) if d <= d_target: os_lower.append(results[0][0]) ts_lower.append(results[1][0]) else: os_lower.append(None) ts_lower.append(None) if d >= d_target: os_higher.append(results[0][0]) ts_higher.append(results[1][0]) else: os_higher.append(None) ts_higher.append(None) # Determine X axis range format_string = "{:." + str(dps) + "f}" x_axis_values = [format_string.format(x) for x in list(effect_sizes)] return { "title": "p-value vs Effect Size (sample size: {}, enrolment ratio: {:.3f})".format(n_1 + n_2, n_1/n_2), "xAxisLabel": "Effect Size (d)", "yAxisLabel": "p-value", "labels": x_axis_values, "verticalLine": { "position": format_string.format(d_actual), "label": "Effect Size: " + format_string.format(d_raw) }, "dataset": [ { "label": "One Sided Test", "data": os_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "One Sided Test", "data": os_higher, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] }, { "label": "Two Sided Test", "data": ts_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "Two Sided Test", "data": ts_higher, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] } ] }
def generate_p_value_vs_sample_size_chart_data(n_1, n_2, x_bar_1, x_bar_2, s_1, s_2, alpha): d_actual = utils.calculate_cohens_d(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2) d_adjustment = utils.calculate_d_adjustment(s_1, n_1, s_2, n_2) n_raw = n_1 + n_2 r_e = n_1 / n_2 n_results = tt.calculate_sample_size_from_means(mu_1=x_bar_1, mu_2=x_bar_2, sigma_1=s_1, sigma_2=s_2, alpha=alpha, power=0.5, enrolment_ratio=r_e) n_target = n_results[0][1] + n_results[1][1] ff = 0.1 x_min = int(max(4, min(n_raw * (1 - ff), n_target * (1 - ff)))) x_max = int(max(n_raw * (1 + ff), n_target * (1 + ff))) step = int(max(1, (x_max - x_min) / 500)) sample_sizes = np.arange(x_min, x_max, step) n_actual = utils.find_closest_value(sample_sizes, n_raw) n_target = utils.find_closest_value(sample_sizes, n_target) os_lower = [] ts_lower = [] os_higher = [] ts_higher = [] for n in sample_sizes: cn_1 = math.ceil(n * r_e / (1 + r_e)) cn_2 = math.ceil(n - cn_1) welches_df = utils.welches_degrees_of_freedom(s_1, cn_1, s_2, cn_2) d = utils.calculate_cohens_d(x_bar_1, s_1, cn_1, x_bar_2, s_2, cn_2) t_stat = tt.calculate_t_stat_from_cohens_d(d, cn_1, cn_2) * d_adjustment results = tt.calculate_p_value(t_stat, welches_df) if n <= n_target: os_lower.append(results[0][0]) ts_lower.append(results[1][0]) else: os_lower.append(None) ts_lower.append(None) if n >= n_target: os_higher.append(results[0][0]) ts_higher.append(results[1][0]) else: os_higher.append(None) ts_higher.append(None) # Determine X axis range x_axis_values = [str(x) for x in list(sample_sizes)] return { "title": "Sample Size vs p-value (effect size: {:.3f}, enrolment ratio: {:.3f})".format(d_actual, n_1/n_2), "xAxisLabel": "Total Samples", "yAxisLabel": "p-value", "labels": x_axis_values, "verticalLine": { "position": str(n_actual), "label": "Sample Size: {}".format(n_raw) }, "dataset": [ { "label": "One Sided Test", "data": os_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "One Sided Test", "data": os_higher, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] }, { "label": "Two Sided Test", "data": ts_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "Two Sided Test", "data": ts_higher, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] } ] }
def generate_sampling_distributions_chart_data(mu_1, mu_2, sigma_1, sigma_2, n_1, n_2, alpha): n = n_1 + n_2 - 2 df = utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2, n_2) H0_mean = 0 HA_mean = mu_2 - mu_1 sd_pooled = utils.calculate_pooled_standard_deviation(n_1, n_2, sigma_1, sigma_2) se = sd_pooled * (1/n_1 + 1/n_2)**0.5 d = utils.calculate_cohens_d(mu_1=mu_1, sigma_1=sigma_1, n_1=n_1, mu_2=mu_2, sigma_2=sigma_2, n_2=n_2) nc = d * (2 / (1/n_1 + 1/n_2) / 2)**0.5 if mu_1 > mu_2: nc *= -1 # Determine X axis range x_min = min(H0_mean, nc) - 4 x_max = max(H0_mean, nc) + 4 x_axis_values = list(np.linspace(start=x_min, stop=x_max, num=1000, endpoint=True)) alpha_lower = t.ppf(q=alpha/2, df=df) alpha_upper = -1 * alpha_lower # Insert key values for val in [H0_mean, nc, alpha_lower, alpha_upper]: if val not in x_axis_values: bisect.insort(x_axis_values, val) H0_significant = [] H0_not_significant = [] HA_powered = [] HA_unpowered = [] threshold = alpha_upper if HA_mean >= H0_mean else alpha_lower nct_dist = utils.initialize_nct_distribution(df=df, nc=nc) for value in x_axis_values: # Null Hypothesis H0_not_significant.append(t.pdf(x=value, df=df)) if value < alpha_lower or value > alpha_upper: H0_significant.append(t.pdf(x=value, df=df)) else: H0_significant.append(None) # Alternative Hypothesis HA_powered.append(nct_dist.pdf(x=value)) if HA_mean < H0_mean and value > alpha_lower: HA_unpowered.append(nct_dist.pdf(x=value)) elif HA_mean >= H0_mean and value < alpha_upper: HA_unpowered.append(nct_dist.pdf(x=value)) else: HA_unpowered.append(None) if HA_mean < H0_mean: power = nct_dist.cdf(x=alpha_lower) threshold = alpha_lower else: power = 1 - nct_dist.cdf(x=alpha_upper) threshold = alpha_upper decimal_points = utils.determine_decimal_points(x_max) format_string = "{:." + str(decimal_points) + "f}" return { "title": "Central and Noncentral Distributions (effect size: {:0.3f}, α: {:0.3f}, power (1 - β): {:.1%})".format(d, alpha, power), "xAxisLabel": "t statistic", "yAxisLabel": "Density", "labels": [format_string.format(x) for x in x_axis_values], "verticalLine": { "position": format_string.format(utils.find_closest_value(x_axis_values, threshold)), "label": "t crit: " + format_string.format(threshold) }, "hidePoints": True, "dataset": [ { "label": "H0 - Significant", "data": H0_significant, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "H0", "data": H0_not_significant, "borderColor": colors.line_colors[0], "backgroundColor": None }, { "label": "HA - Powered", "data": HA_powered, "borderColor": colors.line_colors[1], "backgroundColor": None }, { "label": "HA", "data": HA_unpowered, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] } ] }
def generate_power_vs_effect_size_data(d, alpha, power, n_1, n_2): power_list = list(np.arange(0.4, 1, 0.001)) power_target = utils.find_closest_value(power_list, power) os_lower = [] ts_lower = [] os_upper = [] ts_upper = [] for pow in power_list: results = tt.calculate_min_effect_size(n_1=n_1, n_2=n_2, alpha=alpha, power=pow) if pow < power_target: os_lower.append(results[0][0]) ts_lower.append(results[1][0]) os_upper.append(None) ts_upper.append(None) elif pow > power_target: os_lower.append(None) ts_lower.append(None) os_upper.append(results[0][0]) ts_upper.append(results[1][0]) elif pow == power_target: os_lower.append(results[0][0]) ts_lower.append(results[1][0]) os_upper.append(results[0][0]) ts_upper.append(results[1][0]) return { "title": "Power vs Effect Size (α: {:0.3f}, total samples: {})".format(alpha, n_1 + n_2), "xAxisLabel": "Power (1 - β)", "yAxisLabel": "Effect Size (d)", "labels": ["{:.3f}".format(x) for x in power_list], "verticalLine": { "position": "{:.3f}".format(power_target), "label": "Statistical Power: {:.3f}".format(power) }, "dataset": [ { "label": "One Sided Test", "data": os_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "One Sided Test", "data": os_upper, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] }, { "label": "Two Sided Test", "data": ts_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "Two Sided Test", "data": ts_upper, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] } ] }
def generate_sample_size_vs_effect_size_data(d, alpha, power, n_1, n_2): enrolment_ratio = n_1/n_2 n_raw = n_1 + n_2 ff = 0.5 x_min = max(4, int(n_raw * (1 - ff))) x_max = max(int(n_raw * (1 + ff)), 100) step = int(max(1, (x_max - x_min) / 500)) sample_sizes = np.arange(x_min, x_max, step) n_actual = utils.find_closest_value(sample_sizes, n_raw) os_lower = [] ts_lower = [] os_upper = [] ts_upper = [] for n in sample_sizes: cn_1 = math.ceil(n * enrolment_ratio / (1 + enrolment_ratio)) cn_2 = math.ceil(n - cn_1) results = tt.calculate_min_effect_size(n_1=cn_1, n_2=cn_2, alpha=alpha, power=power) if n < n_actual: os_lower.append(results[0][0]) ts_lower.append(results[1][0]) os_upper.append(None) ts_upper.append(None) elif n > n_actual: os_lower.append(None) ts_lower.append(None) os_upper.append(results[0][0]) ts_upper.append(results[1][0]) elif n == n_actual: os_lower.append(results[0][0]) ts_lower.append(results[1][0]) os_upper.append(results[0][0]) ts_upper.append(results[1][0]) return { "title": "Sample Size vs Effect Size (α: {:0.3f}, power (1 - β): {:.1%})".format(alpha, power), "xAxisLabel": "Sample Size", "yAxisLabel": "Effect Size (d)", "labels": [str(x) for x in list(sample_sizes)], "verticalLine": { "position": str(n_actual), "label": "Sample Size: {}".format(n_raw) }, "dataset": [ { "label": "One Sided Test", "data": os_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "One Sided Test", "data": os_upper, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] }, { "label": "Two Sided Test", "data": ts_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "Two Sided Test", "data": ts_upper, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] } ] }
def generate_effect_size_vs_power_chart_data(d, alpha, n_1, n_2): ff = 0.1 d_powered = tt.calculate_min_effect_size(n_1=n_1, n_2=n_2, alpha=alpha, power=0.8) if d < 0: x_min = min(-d_powered[1][0], d) * (1 + ff) x_max = max(-0.01, d * (1 + ff)) else: x_min = min(0.01, d * (1 - ff)) x_max = max(d_powered[1][0], d) * (1 + ff) step = (x_max - x_min) / 500 dps = utils.determine_decimal_points(x_max) effect_sizes = [round(x, dps) for x in np.arange(x_min, x_max, step)] d_actual = utils.find_closest_value(effect_sizes, d) os_lower = [] ts_lower = [] os_higher = [] ts_higher = [] for es in effect_sizes: results = tt.calculate_power_from_cohens_d(d=es, n_1=n_1, n_2=n_2, alpha=alpha) if (es < d_actual and d_actual > 0) or (es > d_actual and d_actual < 0): os_lower.append(results[0][0]) ts_lower.append(results[1][0]) os_higher.append(None) ts_higher.append(None) elif (es < d_actual and d_actual < 0) or (es > d_actual and d_actual > 0): os_lower.append(None) ts_lower.append(None) os_higher.append(results[0][0]) ts_higher.append(results[1][0]) elif es == d_actual: os_lower.append(results[0][0]) ts_lower.append(results[1][0]) os_higher.append(results[0][0]) ts_higher.append(results[1][0]) format_string = "{:." + str(dps) + "f}" chart_data = { "title": "Sample Size vs Effect Size (α: {:.3f}, total samples: {:,})".format(alpha, n_1 + n_2), "xAxisLabel": "Effect Size (d)", "yAxisLabel": "Statistical Power (1 - β)", "labels": [format_string.format(es) for es in effect_sizes], "verticalLine": { "position": format_string.format(d_actual), "label": "Effect Size: " + format_string.format(d) }, "dataset": [ { "label": "Lower Powers – One Sided Test", "data": os_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "Higher Powers – One Sided Test", "data": os_higher, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] }, { "label": "Lower Powers – Two Sided Test", "data": ts_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "Higher Powers – Two Sided Test", "data": ts_higher, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] }, ] } return chart_data