def order_assumptions(weights, alpha, return_p): # Right now only works for K = 2 from scipy.stats import mannwhitneyu as mwu T1, p1 = mwu(weights[0][0], weights[0][1]) T2, p2 = mwu(weights[0][1], weights[1][1]) #K = len(weights) #T = np.zeros(shape = (K, K)) #p = np.zeros(shape = (K, K)) if p1 < alpha and p2 < alpha: if return_p: return True, np.array([p1, p2]) else: return True if return_p: return False, np.array([p1, p2]) else: return False
def MWU_vs_average_helper(data, groups, gene): output = pd.DataFrame(index=[gene], columns=return_unique(groups)) for gr in return_unique(groups): d1 = data.ix[groups[groups == gr].index] d2 = data.ix[groups[groups != gr].index] try: output.ix[gene, gr] = mwu(d1, d2, alternative='greater')[1] except: output.ix[gene, gr] = 1.0 return output
def MWU_vs_groups_helper(data, groups, gene): output = pd.DataFrame(index=[gene], columns=return_unique(groups)) for gr1 in return_unique(groups): d1 = data.ix[groups[groups == gr1].index] pvals = [] for gr2 in [gr2 for gr2 in return_unique(groups) if gr2 != gr1]: d2 = data.ix[groups[groups == gr2].index] try: pval_tmp = mwu(d1, d2, alternative='greater')[1] except: pval_tmp = 1.0 pvals.append(pval_tmp) output.ix[gene, gr1] = np.max(pvals) return output.astype(float)
def test_apply_test_with_test(self): test = StatTest.from_library("Mann-Whitney") self.assertAlmostEqual( mwu(self.x, self.y, alternative="two-sided")[1], apply_test(self.x, self.y, test).pvalue)
import pandas as pd import numpy as np from scipy.stats import ttest_ind, mannwhitneyu as mwu import matplotlib.pyplot as plt v = pd.read_table('/Users/nate/Projects/EBV_interactome/stad/stadmirs_counts211neg24pos.fordeseq.tsv', index_col=0) s = pd.read_table('/Users/nate/Projects/EBV_interactome/stad/CIBERSORTx_Job190_Results.csv', index_col=0,sep=',') s = s.loc[v.columns] new = pd.DataFrame(index=s.columns[:-4]) fcs, ps, means = [], [], [] for i in new.index: fcs.append((np.mean(s.iloc[211:][i])+.01) / (np.mean(s.iloc[:211][i]) +.01)) ps.append(mwu(s.iloc[211:][i], s.iloc[:211][i], alternative='two-sided')[1]) means.append(np.mean(s[i] + .01)) new['fc'] = fcs new['ps'] = ps new['mean'] = means new = new.sort_values('fc') fig, ax = plt.subplots(figsize=(6,8)) plt.scatter(np.log2(new['fc']),range(len(new.index)),s= 100 * new['mean'],c=['r' if x < .05 else "0.75" for x in new['ps']],alpha=0.7, lw=0) ax.set_yticks(range(len(new.index))) ax.set_yticklabels(new.index) plt.xlim([-3.5, 3.5]) plt.tight_layout() plt.savefig('/Users/nate/Projects/EBV_interactome/stad/stad_cib_ebv_posvneg.svg')
euc_dist = this_mean_dist, cos_dist = this_mean_cos_dist, index = [0])) dist_frame = pd.concat([dist_frame, this_dist_frame]) print(this_index) low_cov_nrns['file'] = low_cov_nrns.index.get_level_values('file') low_cov_nrns['neuron'] = low_cov_nrns.index.get_level_values('neuron') low_cov_nrns = pd.merge(low_cov_nrns,dist_frame) plt.figure();sns.swarmplot(x = 'stable',y='euc_dist',data=low_cov_nrns) plt.figure();sns.swarmplot(x = 'stable',y='cos_dist',data=low_cov_nrns) mwu(low_cov_nrns.query('stable == True').euc_dist, low_cov_nrns.query('stable == False').euc_dist) mwu(low_cov_nrns.query('stable == True').cos_dist, low_cov_nrns.query('stable == False').cos_dist) # ============================================================================= # ============================================================================= # Pull out and cluster distance matrices clust_post_dist = nrn_dist[trial_order,:] clust_post_dist = clust_post_dist[:,trial_order] ## Distance matrix cluster plots plt.figure() plt.subplot(221);plt.imshow(exposure.equalize_hist(nrn_dist));plt.title('Un Stim') plt.subplot(222);plt.imshow(exposure.equalize_hist(clust_post_dist));plt.title('Clust Stim') line_num = np.where(np.diff(np.sort(this_groups)))[0] for point in line_num: plt.axhline(point+0.5,color = 'red') plt.axvline(point+0.5,color = 'red')
zip(['step_size', 'window_size', 'total_time'], [25, 250, 7000])) data.get_data() data.get_firing_rates() for nrn in range(data.off_spikes[0].shape[0]): for taste in range(4): # Only take neurons which fire in every trial all_spikes = np.concatenate( (np.asarray(data.off_spikes), np.asarray(data.on_spikes)), axis=2) all_spikes = all_spikes[:, nrn, :, 2000:4000] if not (np.sum((np.sum(all_spikes, axis=2) == 0).flatten()) > 0): this_off = np.asarray(data.normal_off_firing) this_off = this_off[:, nrn, :, 80:160] this_off_mean = np.mean(this_off, axis=1) this_on = np.asarray(data.normal_on_firing) this_on = this_on[:, nrn, :, 80:160] this_on_mean = np.mean(this_on, axis=1) # Perform Mann-Whitney U-test on every timepoint alpha = 0.05 / this_off.shape[2] p_vals = np.empty((this_off.shape[0], this_off.shape[2])) for taste in range(4): for time in range(this_off.shape[2]): p_vals[taste, time] = mwu(this_off[taste, :, time], this_on[taste, :, time])[1] significant = np.sum(p_vals < alpha, axis=1) > 100 / 25 min_p_vals = np.min(p_vals, axis=1)
# Perform the mann whitney u test for each tail for alternative in ["less", "greater"]: for crisis in crises_df: # Skip over country code column if crisis == "cc3": continue fra_sample, gbr_sample = [], [] # Gather each countries data for cc in ccs: if get_colonist(cc) == "FRA": fra_sample.append( crises_df[crisis].loc[crises_df["cc3"] == cc].sum()) elif get_colonist(cc) == "GBR": gbr_sample.append( crises_df[crisis].loc[crises_df["cc3"] == cc].sum()) # Print our results and hypotheses print(f"H0: {crisis} distribution is the same for former British " + "and French colonies.") print(f"H1: {crisis} distribution is {alternative} for former " + "British colonies compared to former French colonies.") print("Note: In our case a greater distribution would equal " + "a less stable economy") print() mwuresult = mwu(gbr_sample, fra_sample, alternative=alternative) print("U statistic:", mwuresult.statistic) print("P-value: ", mwuresult.pvalue) print()
def plot_boxes(xdata, ydata, labels, colors, ax = None): """ Generate a box plot from a list containing the data perform a Mann-Whitney U-test to test for mean differences. Arguments: xdata -- a list containing data to plot ydata -- a list containing data to plot labels -- a list of string containig the variables colors -- a list of strings containgin colors Returns: ax: a box plots with where the horizontal line is the median, boxes the first and third quartiles, and the whiskers the most extreme data points <1.5x the interquartile distance form the edges. It also show single data form the experiments. info: the mean and standard error of the samples, together with the the probability that the means are the same. """ if ax is None: ax = plt.gca() # if not given, get current axis # Box plots (sym = '' do not mark outliners) data = [xdata, ydata] bp = ax.boxplot(data, widths = 0.45, patch_artist=1, sym='') # add sample size to labels xlabels = list() for i in enumerate(data): xlabels.append(labels[i] + '\n(n=' + str(len(data[i])) + ')') ax.set_xticklabels(xlabels) for patch, color in zip(bp['boxes'], colors): patch.set_facecolor(color) patch.set_edgecolor('black') patch.set_alpha(0.1) patch.set_linewidth(2) for patch in bp['whiskers']: patch.set(color='black', lw=3, ls='-') for cap in bp['caps']: cap.set(color='black', lw=3) for patch, color in zip(bp['medians'], colors): patch.set_color(color) patch.set_linewidth(3) # plot data points mean = 1 for points, color in zip(data, colors): xval = np.random.normal(loc = mean, scale = .045, size=len(points)) mean +=1 ax.plot(xval, points, 'o', color=color, ms=4) # remove axis and adjust ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.get_yaxis().tick_left() # xlabels xlabels = list() for i in enumerate(data): xlabels.append(labels[i] + '\n(n=' + str(len(data[i])) + ')') #ax.set_xticklabels(xlabels, fontsize = 14) ax.set_xticks([1,2]) ax.xaxis.set_ticks_position('none') # statistics stats_0 = ( labels[0],np.mean(data[0]), sem(data[0]), len(data[0]) ) stats_1 = ( labels[1],np.mean(data[1]), sem(data[1]), len(data[1]) ) print('%s = %2.4f +/- %2.4f, n = %d' %stats_0) print('%s = %2.4f +/- %2.4f, n = %d' %stats_1) u_test = mwu(data[0], data[1], alternative = 'two-sided')[1] print('P = %2.4f, Mann-Whitney (two-sided U test)\n'%u_test) infostats = {'P-value': u_test} return(ax, infostats)
def plot_bars(xdata, ydata, labels, colors, ax = None): """ Generate a bar plot from a list containing the data perform a Mann-Whitney U-test to test for mean differences. Arguments ---------- xdata -- a list containing data to plot ydata -- a list containing data to plot labels -- a list of string containig the variable names colors -- a list of strings containgin colors to plot the bars Returns: ax: a bar plot with the means, error bars with the standard error of the mean, and single data points. info: hhe mean and standard error of the samples, together with the the probability that the means are the same. """ if ax is None: ax = plt.gca() # if not given, get current axis data = [xdata, ydata] yloc = (1,2) # add sample size to labels avg = np.mean(data[0]), np.mean(data[1]) myparams = dict(width = 0.65, color = colors, align = 'center', alpha = 0.5) # bar ax.bar(yloc, avg, **myparams) # single data points and error bars mycaps = dict(capsize = 10, elinewidth = 3, markeredgewidth = 3) yerr0 = sem(data[0]) xloc0 = np.random.normal(loc=1, scale=0.09, size = len(data[0])) ax.errorbar(yloc[0], avg[0], yerr0, color=colors[0], **mycaps) ax.plot(xloc0, data[0], 'o', ms=4, color='k') yerr1 = sem(data[1]) xloc1 = np.random.normal(loc=2, scale=0.09, size = len(data[1])) ax.errorbar(yloc[1], avg[1], yerr1, color=colors[1], **mycaps) ax.plot(xloc1, data[1], 'o', ms=4, color='k') # remove axis and adjust ax.set_xlim(0,3) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) # xlabels xlabels = list() for i in enumerate(data): xlabels.append(labels[i] + '\n(n=' + str(len(data[i])) + ')') ax.set_xticklabels(xlabels, fontsize=14) ax.set_xticks([1,2]) ax.xaxis.set_ticks_position('none') # statistics stats_0 = ( labels[0],np.mean(data[0]), sem(data[0]), len(data[0]) ) stats_1 = ( labels[1],np.mean(data[1]), sem(data[1]), len(data[1]) ) print('%s = %2.4f +/- %2.4f, n = %d' %stats_0) print('%s = %2.4f +/- %2.4f, n = %d\n' %stats_1) u_test = mwu(data[0], data[1], alternative = 'two-sided')[1] print('P = %2.4f, Mann-Whitney (two-side U test)'%u_test) infostats = {'P-value': u_test} return(ax, infostats)