# result in an error if LaTeX is not installed on your system. In that case, # you can set usetex to False. from astroML.plotting import setup_text_plots setup_text_plots(fontsize=8, usetex=True) #------------------------------------------------------------ # Define the distribution parameters to be plotted mu = 0 d1_values = [1, 5, 2, 10] d2_values = [1, 2, 5, 50] linestyles = ['-', '--', ':', '-.'] x = np.linspace(0, 5, 1001)[1:] fig, ax = plt.subplots(figsize=(5, 3.75)) for (d1, d2, ls) in zip(d1_values, d2_values, linestyles): dist = fisher_f(d1, d2, mu) plt.plot(x, dist.pdf(x), ls=ls, c='black', label=r'$d_1=%i,\ d_2=%i$' % (d1, d2)) plt.xlim(0, 4) plt.ylim(0.0, 1.0) plt.xlabel('$x$') plt.ylabel(r'$p(x|d_1, d_2)$') plt.title("Fisher's Distribution") plt.legend() plt.show()
lhs_areas.append(area) # Perform T-test test = ttest_ind(random_areas, lhs_areas, equal_var=False, axis=0) print("value of T-test: " + str(test)) # Interval on which to generate pdf. x = np.linspace(0, 15, 1001)[1:] # Generate distribution and actual value and plot mu = 0 ls = '-' # Make distribution dist = fisher_f(len(lhs_areas), len(random_areas), mu) plt.plot(x, dist.pdf(x), ls=ls, c='black', label='Fischer distribution') print(sum(dist.pdf(x[0:750]) * 15 / 1001)) actual = max([ np.var(random_areas) / np.var(lhs_areas), np.var(lhs_areas) / np.var(random_areas) ]) plt.vlines(actual, ymin=-10, ymax=10, ls='-.', label='Actual value') plt.xlim(0, 12) plt.ylim(0.0, 10) plt.xlabel('$x$') plt.ylabel(r'$p(x|d_1, d_2)$') plt.title("Fisher's Distribution")
# you can set usetex to False. from astroML.plotting import setup_text_plots setup_text_plots(fontsize=8, usetex=True) #------------------------------------------------------------ # Define the distribution parameters to be plotted mu = 0 d1_values = [1, 5, 2, 10] d2_values = [1, 2, 5, 50] linestyles = ['-', '--', ':', '-.'] x = np.linspace(0, 5, 1001)[1:] fig, ax = plt.subplots(figsize=(5, 3.75)) for (d1, d2, ls) in zip(d1_values, d2_values, linestyles): dist = fisher_f(d1, d2, mu) plt.plot(x, dist.pdf(x), ls=ls, c='black', label=r'$d_1=%i,\ d_2=%i$' % (d1, d2)) plt.xlim(0, 4) plt.ylim(0.0, 1.0) plt.xlabel('$x$') plt.ylabel(r'$p(x|d_1, d_2)$') plt.title("Fisher's Distribution") plt.legend()
# Task - 1 : Generating p-values def my_map(prm): LIST = prm[1:1 + PPL * CAT] NUM2, DENOM2 = np.linalg.multi_dot( [LIST.T, NUM, LIST]), np.linalg.multi_dot([LIST.T, DENOM, LIST]) val = (NUM2 * (PPL * CAT - RANK1)) / (DENOM2 * (RANK1 - RANK2)) if val: return val else: return 0 data['f_val'] = data.apply(my_map, axis=1) data['p_val'] = 1 - fisher_f(diff1, diff2, 0).cdf(data['f_val']) p_vals = np.array(sorted(data['p_val'])) p_vals = p_vals[~np.isnan(p_vals)] # In[7]: # Task-2 : Generating Histogram of p-values data['p_val'].hist() plt.show() # In[9]: # Task-4 : Using FDR cut-off of 0.05 for shortlisting rows, and FDR_cutoff = 0.05 shrt_ls_rows = data['p_val'] < FDR_cutoff