def rotational_bars(rot_high_files, rot_low_files): nuceriod_plt.config_params(11) df_high = increase.load_d(rot_high_files) df_high['rot'] = 'high' df_low = increase.load_d(rot_low_files) df_low['rot'] = 'low' df = pd.concat([df_high, df_low]) fig, axs = plt.subplots(nrows=len(df.groupby(by='name')), ncols=1, figsize=(1.75, 8)) order = ['low', 'high'] for ix, (sig, data) in enumerate(df.groupby(by='name')): xvals = [] yvals = [] colors = [] count = 0 for i in order: val = data[data['rot'] == i]['snr'].tolist()[0] yvals.append(val) xvals.append(count) colors.append(COLORS[sig]) count += 1 axs[ix].bar(xvals, yvals, color=colors, label=['low', 'high']) axs[ix].set_xticks([0, 1]) axs[ix].set_xticklabels(('low', 'high'), fontsize=11) axs[ix].set_ylabel('SNR') axs[ix].spines['right'].set_visible(False) axs[ix].spines['top'].set_visible(False) plt.tight_layout()
def compare(files_deconstructsigs, files_sigfit): nuceriod_plt.config_params(11) df_deconstructsigs = increase.load_d(files_deconstructsigs) df_deconstructsigs['control'] = 'deconstructsigs' df_sigfit = increase.load_d(files_sigfit) df_sigfit['control'] = 'sigfit' toplot = pd.concat([df_deconstructsigs, df_sigfit]) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(3, 2)) sig_list = [] decon_list = [] sig_list2 = [] decon_list2 = [] for i, data in toplot.groupby(by='name'): if len(data) == 2: row = data.iloc[0] snr_decon = data[data['control'] == 'sigfit']['snr'].iloc[0] snr_sigfit = data[data['control'] == 'deconstructsigs']['snr'].iloc[0] sig_list.append(snr_sigfit) decon_list.append(snr_decon) if row['cross_validation_max'] < 0: ax.scatter(-np.log(snr_decon), -np.log(snr_sigfit), c=COLORS[i]) sig_list2.append(-np.log(snr_sigfit)) decon_list2.append(-np.log(snr_decon)) else: ax.scatter(np.log(snr_decon), np.log(snr_sigfit), c=COLORS[i]) sig_list2.append(np.log(snr_sigfit)) decon_list2.append(np.log(snr_decon)) plt.xlabel('Period') ylabels = [str(2**abs(i)) for i in range(2, 10, 2)] yfinal = ylabels[::-1] + ylabels[1:] plt.xticks(np.arange(-6, 8, 2), yfinal) ylabels = [str(2**abs(i)) for i in range(2, 10, 2)] yfinal = ylabels[::-1] + ylabels[1:] plt.yticks(np.arange(-6, 8, 2), yfinal) slope, intercept, r_value, p_value, std_err = stats.linregress( sig_list2, decon_list2) xvals = np.arange(-6, 8, 2) yvals = [slope * y + intercept for y in xvals] plt.plot(xvals, yvals) R, pval = stats.pearsonr(sig_list, decon_list) plt.text(-4, 3, 'R = {}\npval = {}'.format(round(R, 3), round(pval, 3))) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.ylabel('SNR (Sigfit)') plt.xlabel('SNR (Deconstructsig)') plt.tight_layout()
def compare(cohorts_5mer, cohorts_3mer, cohorts_linker, tumors=None): nuceriod_plt.config_params(14) df_5mer = increase.load_d(cohorts_5mer) df_5mer['control'] = 'mer5' df_3mer = increase.load_d(cohorts_3mer) df_3mer['control'] = 'mer3' df_linker = increase.load_d(cohorts_linker) df_linker['control'] = 'linker' df = pd.concat([df_5mer, df_3mer, df_linker]) df['increase_in'] = df['observed_in'] - df['expected_in'] df['prop_increase_in'] = df['increase_in'] / df['expected_in'] df['ttype'] = df['name'].map(TTYPES) if tumors is not None: df = df[df['name'].isin(tumors)] toplot = df.sort_values(by='prop_increase_in', ascending=True) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 5)) labels = [] colors = [] red_patch = mpatches.Patch(color='red', label='3-mers') green_patch = mpatches.Patch(color='green', label='no nucleosomes in context') orange_patch = mpatches.Patch(color='orange', label='5-mers') for ix, (ttype, data) in enumerate( toplot.sort_values(by='snr', ascending=True).groupby(by='ttype', sort=False)): snr1 = data[data['control'] == 'mer3']['snr'].tolist()[0] snr2 = data[data['control'] == 'linker']['snr'].tolist()[0] snr3 = data[data['control'] == 'mer5']['snr'].tolist()[0] colors.append('red') colors.append('green') colors.append('orange') ax.scatter(ix, math.log2(snr1), color='red', s=15, alpha=0.8) ax.scatter(ix, math.log2(snr2), color='green', s=15, alpha=0.8) ax.scatter(ix, math.log2(snr3), color='orange', s=15, alpha=0.8) labels.append(ttype) plt.xticks([i for i in range(ix + 1)], labels, rotation=90) tick = [2, 4, 6, 8] plt.yticks(tick, [str(2**t) for t in tick]) plt.ylabel('log2(SNR)') plt.legend(handles=[red_patch, green_patch, orange_patch]) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.tight_layout()
def rotational(cohorts_high, cohorts_low, tumors=None): nuceriod_plt.config_params(14) df_high = increase.load_d(cohorts_high) df_high['control'] = 'high' df_low = increase.load_d(cohorts_low) df_low['control'] = 'low' df = pd.concat([df_high, df_low]) df['increase_in'] = df['observed_in'] - df['expected_in'] df['prop_increase_in'] = df['increase_in'] / df['expected_in'] df['ttype'] = df['name'].map(TTYPES) if tumors is not None: df = df[df['name'].isin(tumors)] toplot = df.sort_values(by='prop_increase_in', ascending=True) order = ['low', 'high'] count = 0 xvals = [] yvals = [] colors = [] dic_t = collections.defaultdict(dict) labels = [] for sig, data in toplot.sort_values(by='snr').groupby(by='ttype', sort=False): if sig in COLORS: labels.append('{}'.format(sig)) for i in order: val = data[data['control'] == i]['snr'].tolist()[0] dic_t['Sign {}'.format(sig)][i] = val yvals.append(math.log2(val)) xvals.append(count) colors.append(COLORS[sig]) count += 1 fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 1.5)) ax.bar(xvals, yvals, color=colors, label=labels) ax.set_ylabel('SNR') plt.xticks(np.arange(0.5, 54, 2), labels, rotation=90, fontsize=13) tick = [2, 4, 6, 8] plt.yticks(tick, [str(2**t) for t in tick]) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False)
def _load(files): df = increase.load_d(files) df['increase_in'] = df['observed_in'] - df['expected_in'] df['prop_increase_in'] = df['increase_in'] / df['expected_in'] df['outname'] = df['name'].str.replace('Signature_', 'Sign ') df['empirical_pvalue_snr'].replace(0, 0.001, inplace=True) df['empirical_pvalue_snr'] = df['empirical_pvalue_snr'].fillna(1) qvals = multipletests(df['empirical_pvalue_snr'].tolist(), method='fdr_bh') df['qvals_snr'] = qvals[1] return df
def find(files, min_muts=None): df = increase.load_d(files) df['ttype'] = df['name'].map(TTYPES) if min_muts is not None: df = df[df['nmuts_whole_nucleosome'] > min_muts] df.sort_values(by='nmuts_whole_nucleosome', ascending=False, inplace=True) ttypes = set() names = [] for i, row in df.iterrows(): if row['ttype'] not in ttypes: names.append(row['name']) ttypes.add(row['ttype']) return names
def _load(cohorts, tumors): df = increase.load_d(cohorts) df['increase_in'] = df['observed_in'] - df['expected_in'] df['prop_increase_in'] = df['increase_in'] / df['expected_in'] df['ttype'] = df['name'].map(TTYPES) df = df[df['name'].isin(tumors)].sort_values(by='prop_increase_in', ascending=True) # add Q-value df['empirical_pvalue_snr'] = df['empirical_pvalue_snr'].fillna(1) df['empirical_pvalue_snr'].replace(0, 0.001, inplace=True) qvals = multipletests(df['empirical_pvalue_snr'].tolist(), method='fdr_bh') df['qvals_snr'] = qvals[1] return df
def _load(files): df = increase.load_d(files) df['increase_in'] = df['observed_in'] - df['expected_in'] df['prop_increase_in'] = df['increase_in'] / df['expected_in'] df = df.sort_values(by='prop_increase_in', ascending=True) df = df[df['nmuts_whole_nucleosome'] > 1000] # add Q-value df['empirical_pvalue_snr'] = df['empirical_pvalue_snr'].fillna(1) df['empirical_pvalue_snr'].replace(0, 0.001, inplace=True) qvals = multipletests(df['empirical_pvalue_snr'].tolist(), method='fdr_bh') df['qvals_snr'] = qvals[1] df['corrected_snr'] = df.apply(_sign_SNR, axis=1) return df
def cohorts(files, colors): nuceriod_plt.config_params_full(font_size=7) df = increase.load_d(files) df['increase_in'] = df['observed_in'] - df['expected_in'] df['prop_increase_in'] = df['increase_in'] / df['expected_in'] df['signature_name'] = df['name'].str.split('__').str.get(1) df['ttype'] = df['name'].str.split('__').str.get(0) df['empirical_pvalue_snr'].replace(0, 0.001, inplace=True) df['empirical_pvalue_snr'] = df['empirical_pvalue_snr'].fillna(1) qvals = multipletests(df['empirical_pvalue_snr'].tolist(), method='fdr_bh') df['qvals_snr'] = qvals[1] fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(7, 2)) signature_groups = df.groupby('signature_name') for i in range(1, 31): ix = i - 1 try: data = signature_groups.get_group('Signature_{}'.format(i)) except KeyError: continue else: for _, row in data.iterrows(): if row['cross_validation_max'] > 0: if row['qvals_snr'] < 0.05 and row['snr'] > 8 and 10 < row[ 'peak'] < 10.4: axs[0].scatter(jitter(ix), np.log2(row['snr']), c=colors[row['ttype']], s=25, edgecolor='black', linewidth=0.5) else: axs[0].scatter(jitter(ix), np.log2(row['snr']), c='grey', s=6) elif row['cross_validation_max'] < 0: if row['qvals_snr'] < 0.05 and row['snr'] > 8 and 10 < row[ 'peak'] < 10.4: axs[1].scatter(jitter(ix), -np.log2(row['snr']), c=colors[row['ttype']], s=25, edgecolor='black', linewidth=0.5) else: axs[1].scatter(jitter(ix), -np.log2(row['snr']), c='grey', s=8) yvals = [i for i in range(2, 10, 2)] axs[0].set_yticks(yvals) yvals = [i for i in range(-8, 0, 2)] axs[1].set_yticks(yvals) ylabels = [str(2**abs(i)) for i in range(2, 10, 2)] axs[0].set_yticklabels(ylabels) ylabels = ['{}'.format(str(2**abs(i))) for i in range(-8, 0, 2)] axs[1].set_yticklabels(ylabels) axs[0].spines['right'].set_visible(False) axs[0].spines['top'].set_visible(False) axs[0].set_xlim(-1, 30) xlabels = [i for i in range(1, 31)] xpos = [i for i in range(0, 30)] axs[1].set_xticklabels(xlabels) plt.xticks(xpos, xlabels) axs[1].xaxis.set_ticks_position('top') axs[1].spines['bottom'].set_visible(False) axs[1].spines['right'].set_visible(False) axs[0].set_ylim(1.5, 10) axs[1].set_ylim(-10, -1.5) plt.tight_layout()