def pf(q,df1,df2,ncp=0): """ Calculates the cumulative of the F-distribution """ from scipy.stats import f,ncf if ncp==0: result=f.cdf(x=q,dfn=df1,dfd=df2,loc=0,scale=1) else: result=ncf.cdf(x=q,dfn=df1,dfd=df2,nc=ncp,loc=0,scale=1) return result
def anova2_test_power(arms, alpha, mus, n, sigma): n_cell = 0 ndf = 0 ddf = 0 n_cell = arms * 2 ndf = n_cell - 1 ddf = n * n_cell - ndf - 1 nu = n * np.sum((mus - np.mean(mus))**2) / sigma / sigma f_cut = f.ppf(1 - alpha, ndf, ddf) power = 1 - ncf.cdf(f_cut, ndf, ddf, nu) return power
def anovaf_test_power(arms, alpha, mus, n, sigma): ndf = arms - 1 ddf = (arms - 1) * (n) nu = n * sum((mus - np.mean(mus))**2) / sigma / sigma f_cut = f.ppf(1 - alpha, ndf, ddf) power = 1 - ncf.cdf(f_cut, ndf, ddf, nu) return power
def f_power(model, design, effect_size, alpha): """Calculates the power of an F test. This calculates the probability that the F-statistic is above its critical value (alpha) given an effect of some size. :param model: A patsy formula for which to calculate power. :type model: patsy.formula :param design: A pandas.DataFrame representing a design. :type design: pandas.DataFrame :param effect_size: The size of the effect that the test should be able to detect (also called a signal to noise ratio). :type effect_size: float :param alpha: The critical value that we want the test to be above. :type alpha: float between 0 and 1 :returns: A list of percentage probabilities that an F-test could detect an effect of the given size at the given alpha value for a particular column. Usage: >>> design = dexpy.factorial.build_factorial(4, 8) >>> print(dexpy.power.f_power("1 + A + B + C + D", design, 2.0, 0.05)) [ 95.016, 49.003, 49.003, 49.003, 49.003 ] """ X = dmatrix(model, design) residual_df = X.shape[0] - X.shape[1] XtXi = np.linalg.inv(np.dot(np.transpose(X), X)) non_centrality = 1 / np.diag(XtXi) # pre-calculate crit value for 1 df, most common case crit_value = f.ppf(1 - alpha, 1, residual_df) power = [] for t in range(0, X.shape[1]): nc = adjust_non_centrality(non_centrality[t], X[:,t]) nc *= effect_size * effect_size / 4.0 p = (1 - ncf.cdf(crit_value, 1, residual_df, nc)) power.append(p) return power
def _compute_ppvals_33(self, pcurvetype="full"): family = self._df_results["family"].values df1, df2, stat, pvals, ncp33 = self._df_results[[ "df1", "df2", "stat", "p", "ncp33" ]].to_numpy().T if pcurvetype == "full": pthresh = .05 # Only keep p-values smaller than .05 propsig = 1 / 3 # Under 33% power, 1/3 of p-values should be lower than .05 else: pthresh = .025 # Only keep p-values smaller than .025 # We don't know which prop of p-values should be smaller than .025 under 33% power, so compute it propsig = 3 * self._compute_prop_lower_33(.025, family, df1, df2, pvals, ncp33) # We then stretch the ppval on the [0, 1] interval. pp_33_f = (1 / propsig) * (ncf.cdf(stat, df1, df2, ncp33) - (1 - propsig)) pp_33_chi = (1 / propsig) * (ncx2.cdf(stat, df1, ncp33) - (1 - propsig)) pp_33 = np.where(family == "F", pp_33_f, pp_33_chi) return np.array([ self._bound_pvals(pp) if p < pthresh else np.nan for (p, pp) in zip(pvals, pp_33) ])
plt.annotate("beta= "+str(np.round(betacalc,3)),(mean_h0-stderr,.1*ymax)) plt.annotate("n= "+str(n),(alpha_xval+stderr/2,.2*ymax)) plt.legend(loc=1) plt.xlabel("Effect Size") plt.ylabel("Probability") plt.title("Sample Size and Power Calculation") plt.show() #optimize beta to desired value print("Power of test as entered (n={})= {:.2f}%".format(n,100*(1-betacalc))) print("[nct power= {:.2f}%]".format((1-betacalc)*100)) #compute F dist power : fcrit=f.ppf(1-alpha*sided,1,n-1) dsr=mean_h1/sigma betacalc_f=ncf.cdf(fcrit,1,n-1,n*dsr**2) print("[fct power= {:.2f}%]".format((1-betacalc_f)*100)) def opt_n(n): stderr_n=sigma/n**.5 lam_n=(mean_h1-mean_h0)/stderr_n alpha_xval_n=t.isf(alphas,df=n-1,scale=stderr_n) betah_n=nct.cdf(alpha_xval_n,df=n-1,nc=lam_n,scale=stderr_n) if sided==2: betal_n=nct.cdf(-alpha_xval_n,df=n-1,nc=lam_n) betacalc_n=betah_n-betal_n else: betacalc_n=betah_n return betacalc_n
# Display the probability density function (``pdf``): x = np.linspace(ncf.ppf(0.01, dfn, dfd, nc), ncf.ppf(0.99, dfn, dfd, nc), 100) ax.plot(x, ncf.pdf(x, dfn, dfd, nc), 'r-', lw=5, alpha=0.6, label='ncf pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = ncf(dfn, dfd, nc) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = ncf.ppf([0.001, 0.5, 0.999], dfn, dfd, nc) np.allclose([0.001, 0.5, 0.999], ncf.cdf(vals, dfn, dfd, nc)) # True # Generate random numbers: r = ncf.rvs(dfn, dfd, nc, size=1000) # And compare the histogram: ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()