def scatter(files, tracksheets, ttypes, colors): nuceriod_plt.config_params(12) df = _load(files) toplot = df.merge(tracksheets, how='left') toplot['ttype'] = toplot['tumor_name'].map(ttypes) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16, 4)) ax.set_xscale('log') for i, row in toplot[(toplot['qvals_snr'] < 0.1) & (toplot['snr'] > 8)].iterrows(): ax.scatter(row['muts'], row['prop_increase_in'], s=14, color=colors[row['ttype']]) plt.title('Significant') plt.ylabel('Proportion of increase minor in') plt.xlabel('Number of mutations (log)') plt.hlines(0, 0, 1000000, linestyle='--', color='grey', alpha=0.6) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.ylim(-0.25, 0.25)
def rotational_bars(rot_high_files, rot_low_files): nuceriod_plt.config_params(11) df_high = increase.load_d(rot_high_files) df_high['rot'] = 'high' df_low = increase.load_d(rot_low_files) df_low['rot'] = 'low' df = pd.concat([df_high, df_low]) fig, axs = plt.subplots(nrows=len(df.groupby(by='name')), ncols=1, figsize=(1.75, 8)) order = ['low', 'high'] for ix, (sig, data) in enumerate(df.groupby(by='name')): xvals = [] yvals = [] colors = [] count = 0 for i in order: val = data[data['rot'] == i]['snr'].tolist()[0] yvals.append(val) xvals.append(count) colors.append(COLORS[sig]) count += 1 axs[ix].bar(xvals, yvals, color=colors, label=['low', 'high']) axs[ix].set_xticks([0, 1]) axs[ix].set_xticklabels(('low', 'high'), fontsize=11) axs[ix].set_ylabel('SNR') axs[ix].spines['right'].set_visible(False) axs[ix].spines['top'].set_visible(False) plt.tight_layout()
def compare(files_deconstructsigs, files_sigfit): nuceriod_plt.config_params(11) df_deconstructsigs = increase.load_d(files_deconstructsigs) df_deconstructsigs['control'] = 'deconstructsigs' df_sigfit = increase.load_d(files_sigfit) df_sigfit['control'] = 'sigfit' toplot = pd.concat([df_deconstructsigs, df_sigfit]) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(3, 2)) sig_list = [] decon_list = [] sig_list2 = [] decon_list2 = [] for i, data in toplot.groupby(by='name'): if len(data) == 2: row = data.iloc[0] snr_decon = data[data['control'] == 'sigfit']['snr'].iloc[0] snr_sigfit = data[data['control'] == 'deconstructsigs']['snr'].iloc[0] sig_list.append(snr_sigfit) decon_list.append(snr_decon) if row['cross_validation_max'] < 0: ax.scatter(-np.log(snr_decon), -np.log(snr_sigfit), c=COLORS[i]) sig_list2.append(-np.log(snr_sigfit)) decon_list2.append(-np.log(snr_decon)) else: ax.scatter(np.log(snr_decon), np.log(snr_sigfit), c=COLORS[i]) sig_list2.append(np.log(snr_sigfit)) decon_list2.append(np.log(snr_decon)) plt.xlabel('Period') ylabels = [str(2**abs(i)) for i in range(2, 10, 2)] yfinal = ylabels[::-1] + ylabels[1:] plt.xticks(np.arange(-6, 8, 2), yfinal) ylabels = [str(2**abs(i)) for i in range(2, 10, 2)] yfinal = ylabels[::-1] + ylabels[1:] plt.yticks(np.arange(-6, 8, 2), yfinal) slope, intercept, r_value, p_value, std_err = stats.linregress( sig_list2, decon_list2) xvals = np.arange(-6, 8, 2) yvals = [slope * y + intercept for y in xvals] plt.plot(xvals, yvals) R, pval = stats.pearsonr(sig_list, decon_list) plt.text(-4, 3, 'R = {}\npval = {}'.format(round(R, 3), round(pval, 3))) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.ylabel('SNR (Sigfit)') plt.xlabel('SNR (Deconstructsig)') plt.tight_layout()
def autocorrelation(original_file, simulated_files): nucperiod_plt.config_params(font_size=14) with open(original_file, 'rt') as f: mydict = json.load(f) pair_count_array = np.array(mydict['pair_count']) motif_count = mydict['motif_count'] len_chunk = mydict['chunk_len'] # define the signal signal = np.array([(v / len_chunk) / (motif_count / len_chunk) ** 2 for v in pair_count_array]) # define figure figsize = (10, 5) fig, ax = plt.subplots(1, 1, figsize=figsize) ax.set_title('motif autocorrelation') # raw signal ax.plot(range(4, len(signal)), signal[4:], alpha=0.5, label='raw') # 3-smoothing signal = spectral.mean_smooth(signal, 3) ax.plot(range(4, len(signal)), signal[4:], linewidth=3.0, label='3-smoothed') counter = 0 # detrended smooth autocorrelation initial_values_dict = {'a_0': np.mean(signal[4:]), 'a_1': 0., 'a_2': 0.} params, obj_func = non_linear.create_quadratic_model(initial_values_dict) x = np.arange(len(signal[4:])) non_linear_fitter = non_linear.NonLinearFitting(obj_func, params, x, signal[4:]) _, predicted = non_linear_fitter.least_squares() # with quadratic least-squares fit ax.plot(range(4, len(signal)), predicted, '--', label='quadratic trend') for file in simulated_files: counter += 1 with open(file, 'rt') as f: random_chunk = json.load(f) pc = np.array(random_chunk['pair_count']) mc = random_chunk['motif_count'] len_random_chunk = random_chunk['chunk_len'] random_signal = np.array([(v / len_chunk) / (mc / len_random_chunk) ** 2 for v in pc]) random_signal = spectral.mean_smooth(random_signal, 3) if counter == 1: label = 'randomized' else: label = None ax.plot(range(4, len(random_signal)), random_signal[4:], linewidth=3.0, alpha=0.3, color='grey', label=label) ax.legend() plt.rcParams['savefig.facecolor'] = fig.get_facecolor()
def compare(cohorts_5mer, cohorts_3mer, cohorts_linker, tumors=None): nuceriod_plt.config_params(14) df_5mer = increase.load_d(cohorts_5mer) df_5mer['control'] = 'mer5' df_3mer = increase.load_d(cohorts_3mer) df_3mer['control'] = 'mer3' df_linker = increase.load_d(cohorts_linker) df_linker['control'] = 'linker' df = pd.concat([df_5mer, df_3mer, df_linker]) df['increase_in'] = df['observed_in'] - df['expected_in'] df['prop_increase_in'] = df['increase_in'] / df['expected_in'] df['ttype'] = df['name'].map(TTYPES) if tumors is not None: df = df[df['name'].isin(tumors)] toplot = df.sort_values(by='prop_increase_in', ascending=True) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 5)) labels = [] colors = [] red_patch = mpatches.Patch(color='red', label='3-mers') green_patch = mpatches.Patch(color='green', label='no nucleosomes in context') orange_patch = mpatches.Patch(color='orange', label='5-mers') for ix, (ttype, data) in enumerate( toplot.sort_values(by='snr', ascending=True).groupby(by='ttype', sort=False)): snr1 = data[data['control'] == 'mer3']['snr'].tolist()[0] snr2 = data[data['control'] == 'linker']['snr'].tolist()[0] snr3 = data[data['control'] == 'mer5']['snr'].tolist()[0] colors.append('red') colors.append('green') colors.append('orange') ax.scatter(ix, math.log2(snr1), color='red', s=15, alpha=0.8) ax.scatter(ix, math.log2(snr2), color='green', s=15, alpha=0.8) ax.scatter(ix, math.log2(snr3), color='orange', s=15, alpha=0.8) labels.append(ttype) plt.xticks([i for i in range(ix + 1)], labels, rotation=90) tick = [2, 4, 6, 8] plt.yticks(tick, [str(2**t) for t in tick]) plt.ylabel('log2(SNR)') plt.legend(handles=[red_patch, green_patch, orange_patch]) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.tight_layout()
def plot_bars(snr_high, snr_low): nucperiod_plt.config_params() fig, axs = plt.subplots(nrows=1, ncols=1, figsize=(0.9,1.1,)) xvals = [0, 1] yvals = [snr_low, snr_high] axs.bar(xvals, yvals, label=['low','high'], color=['#afdde9ff', '#afdde9ff']) plt.xticks(xvals, ['low', 'high'], fontsize=7) axs.set_xlabel('SNR', fontsize = 7) axs.spines['right'].set_visible(False) axs.spines['top'].set_visible(False) plt.tight_layout()
def rotational(cohorts_high, cohorts_low, tumors=None): nuceriod_plt.config_params(14) df_high = increase.load_d(cohorts_high) df_high['control'] = 'high' df_low = increase.load_d(cohorts_low) df_low['control'] = 'low' df = pd.concat([df_high, df_low]) df['increase_in'] = df['observed_in'] - df['expected_in'] df['prop_increase_in'] = df['increase_in'] / df['expected_in'] df['ttype'] = df['name'].map(TTYPES) if tumors is not None: df = df[df['name'].isin(tumors)] toplot = df.sort_values(by='prop_increase_in', ascending=True) order = ['low', 'high'] count = 0 xvals = [] yvals = [] colors = [] dic_t = collections.defaultdict(dict) labels = [] for sig, data in toplot.sort_values(by='snr').groupby(by='ttype', sort=False): if sig in COLORS: labels.append('{}'.format(sig)) for i in order: val = data[data['control'] == i]['snr'].tolist()[0] dic_t['Sign {}'.format(sig)][i] = val yvals.append(math.log2(val)) xvals.append(count) colors.append(COLORS[sig]) count += 1 fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 1.5)) ax.bar(xvals, yvals, color=colors, label=labels) ax.set_ylabel('SNR') plt.xticks(np.arange(0.5, 54, 2), labels, rotation=90, fontsize=13) tick = [2, 4, 6, 8] plt.yticks(tick, [str(2**t) for t in tick]) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False)
def sapiens_bars(snr_high, snr_medium, snr_low): nucperiod_plt.config_params(7) fig, axs = plt.subplots(nrows=1, ncols=1, figsize=(1.3, 1.5)) xvals = [0, 1, 2] yvals = [snr_low, snr_medium, snr_high] axs.bar(xvals, yvals, label=['low', 'medium', 'high'], color=['#afdde9ff', '#afdde9ff', '#afdde9ff']) axs.set_xticks([0, 1, 2]) axs.set_xticklabels(('very-low', 'low', 'high'), rotation=90) axs.set_ylabel('SNR') axs.spines['right'].set_visible(False) axs.spines['top'].set_visible(False) plt.tight_layout()
def scatter(df, feature1, feature2, selected_organisms=None, **kwargs): just_once = True if selected_organisms is None: selected_organisms = [] nucperiod_plt.config_params(font_size=24) fig, axes = plt.subplots(1, 1, figsize=(12, 14)) phyllum = ['protists', 'fungi', 'plants', 'vertebrates', 'insects', 'nematodes', 'deuterostomes'] color_label = dict(zip(phyllum, ['blue', 'grey', 'green', 'pink', 'black', 'cyan', 'orange'])) for org_type in phyllum: ds = df[df['org_type'] == org_type] linewidths_normal = [1 for _ in ds[feature1].values] linewidths_snr = [2 if a < 1e-2 and b > 0 else 0. for a, b in zip(ds['qval_power_enrichment'].values, ds[feature1].values)] if just_once: axes.scatter([], [], s=300, linewidths=linewidths_snr, edgecolor='#8b0000ff', color='None', label='q-value < 0.01') just_once = False axes.scatter(ds[feature1].values, ds[feature2].values, s=350, linewidths=linewidths_snr, edgecolor='#8b0000ff', color='None') axes.scatter(ds[feature1].values, ds[feature2].values, s=150, linewidths=linewidths_normal, edgecolor='black', color=color_label[org_type], label=org_type) for i, txt in enumerate(df.index.values): if txt in selected_organisms: axes.annotate(txt, (df.loc[txt, feature1] + 0.02, df.loc[txt, feature2] + 0.02)) axes.set_ylabel('Proportion of 1 Mb chunks with with MP at {0} $\pm$ 0.5 bp (ratio)'.format(kwargs['period'])) axes.set_xlabel('Power Enrichment at {0} $\pm$ 0.5 bp (odds ratio)'.format(kwargs['period'])) # Power enrichment: power significance at {0} $\pm$ 0.5 bp compared to other periods (odds ratio) axes.vlines(0, 0, 1, colors='red', linestyles='dashed', alpha=0.5) axes.legend(loc=(1.03, 0.712)) axes.set_xticks([-3, -2, -1, 0, 1, 2, 3]) axes.set_xticklabels([0.001, 0.01, 0.1, 1, 10, 100, 1000]) axes.set_title('Period = {0} bp'.format(kwargs['period'])) if kwargs: xmin = kwargs['xmin'] ymin = kwargs['ymin'] xmax = kwargs['xmax'] ymax = kwargs['ymax'] axes.set_xlim(xmin, xmax) axes.set_ylim(ymin, ymax) plt.rcParams['savefig.facecolor'] = fig.get_facecolor()
def zoomin(cohorts, tumors): nuceriod_plt.config_params(14) toplot = _load(cohorts, tumors) fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(8, 7), sharex=True) toplot.sort_values(by='qvals_snr', ascending=False, inplace=True) for i, row in toplot.iterrows(): edgecolor = None snr = row['snr'] if (row['qvals_snr'] < 0.05) & (snr > 8): edgecolor = 'darkred' if row['cross_validation_max'] < 0: ax[1].scatter(row['peak'], -np.log2(snr), s=200, linewidth=2, edgecolor=edgecolor, color='white', label=row['name'], alpha=1) ax[1].scatter(row['peak'], -np.log2(snr), s=80, edgecolor='grey', linewidth=0.5, color=COLORS[row['ttype']], label=row['name'], alpha=1) if edgecolor == 'darkred': ax[1].text(row['peak'] + 1, -np.log2(snr) - 0.15, row['ttype']) for i, row in toplot.iterrows(): edgecolor = None snr = row['snr'] if row['qvals_snr'] < 0.05 and snr > 8: edgecolor = 'darkred' if row['cross_validation_max'] > 0: ax[0].scatter(row['peak'], np.log2(snr), s=200, linewidth=2, edgecolor=edgecolor, color='white', label=row['name'], alpha=1) ax[0].scatter(row['peak'], np.log2(snr), s=80, edgecolor='grey', linewidth=0.5, color=COLORS[row['ttype']], label=row['name'], alpha=1) if edgecolor == 'darkred': ax[0].text(row['peak'] + 1, np.log2(snr) - 0.15, row['ttype']) plt.xlabel('Period') xlim = [i for i in range(8, 22, 2)] ax[0].set_xticks(xlim) ax[1].set_xticks(xlim) yvals = [i for i in range(2, 10, 2)] ax[0].set_yticks(yvals) yvals = [i for i in range(-8, 0, 2)] ax[1].set_yticks(yvals) ylabels = [str(2**abs(i)) for i in range(2, 10, 2)] ax[0].set_yticklabels(ylabels) ylabels = ['{}'.format(str(2**abs(i))) for i in range(-8, 0, 2)] ax[1].set_yticklabels(ylabels) ax[0].spines['right'].set_visible(False) ax[0].spines['top'].set_visible(False) ax[1].xaxis.set_ticks_position('top') ax[1].spines['bottom'].set_visible(False) ax[1].spines['right'].set_visible(False) ax[0].set_ylim(1.5, 10) ax[1].set_ylim(-10, -1.5) plt.tight_layout()
def zoomout(files): nuceriod_plt.config_params(14) df = _load(files) df = df[df['nmuts_whole_nucleosome'] > 10000] fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(8, 7), sharex=True) for i, row in df.iterrows(): edgecolor = None snr = row['snr'] if row['qvals_snr'] < 0.05 and snr > 8: edgecolor = 'darkred' if row['cross_validation_max'] > 0: ax[0].scatter(row['peak'], np.log2(snr), s=200, linewidth=2, edgecolor=edgecolor, color='white', label=row['name'], alpha=1) ax[0].scatter(row['peak'], np.log2(snr), s=80, edgecolor='grey', linewidth=0.5, color=COLORS[row['name']], label=row['name'], alpha=1) if edgecolor == 'darkred': ax[0].text(row['peak'] + 10, np.log2(snr) - 0.15, row['outname']) for i, row in df.iterrows(): edgecolor = None snr = row['snr'] if row['qvals_snr'] < 0.05 and snr > 8: edgecolor = 'darkred' if row['cross_validation_max'] < 0: ax[1].scatter(row['peak'], -np.log2(snr), s=200, linewidth=2, edgecolor=edgecolor, color='white', label=row['name'], alpha=1) ax[1].scatter(row['peak'], -np.log2(snr), s=80, edgecolor='grey', linewidth=0.5, color=COLORS[row['name']], label=row['name'], alpha=1) if edgecolor == 'darkred': ax[1].text(row['peak'] + 10, -np.log2(snr) - 0.15, row['outname']) plt.ylabel('log2(SNR)') plt.xlabel('Period') xlim = [i for i in range(50, 260, 20)] ax[0].set_xticks(xlim) ax[1].set_xticks(xlim) yvals = [i for i in range(2, 10, 2)] ax[0].set_yticks(yvals) yvals = [i for i in range(-8, 0, 2)] ax[1].set_yticks(yvals) ylabels = [str(2**abs(i)) for i in range(2, 10, 2)] ax[0].set_yticklabels(ylabels) ylabels = [str(2**abs(i)) for i in range(-8, 0, 2)] ax[1].set_yticklabels(ylabels) ax[0].spines['right'].set_visible(False) ax[0].spines['top'].set_visible(False) ax[1].xaxis.set_ticks_position('top') ax[1].spines['bottom'].set_visible(False) ax[1].spines['right'].set_visible(False) ax[0].set_ylim(1.5, 10) ax[1].set_ylim(-10, -1.5) plt.tight_layout()
def sigmoid(files, tracksheets, ttypes, colors): nuceriod_plt.config_params(12) df = _load(files) toplot = df.merge(tracksheets, how='left') toplot['ttype'] = toplot['tumor_name'].map(ttypes) ttype_inc = collections.defaultdict(float) total_count = 0 ttype_vals = collections.defaultdict(lambda: collections.defaultdict(list)) prop_significant = collections.defaultdict(float) for ttype, data in toplot.groupby(by='ttype'): if len(data) > 10: significant = 0 d = data.sort_values(by='prop_increase_in', ascending=True) mean_d = d['prop_increase_in'].median() ttype_inc[ttype] = mean_d for i, row in d.iterrows(): ttype_vals[ttype]['Prop'].append(row['prop_increase_in']) total_count += 1 if (row['qvals_snr'] < 0.1) & (row['snr'] > 8): significant += 1 c = 'darkred' else: c = 'grey' ttype_vals[ttype]['col'].append(c) prop_significant[ttype] = 100 * significant / len(data) fig, ax = plt.subplots(nrows=1, ncols=len(ttype_inc), figsize=(17, 3.5), sharey=True) ax[0].set_ylabel('Relative increase in mutation rate') ax[0].yaxis.set_ticks(np.arange(-0.3, 0.3, 0.1)) for index, t in enumerate(sorted(ttype_inc, key=ttype_inc.get)): count = 0 for ix, val in enumerate(ttype_vals[t]['Prop']): alpha = 0.3 if ttype_vals[t]['col'][ix] == 'darkred': alpha = 1 ax[index].scatter(count, val, color=ttype_vals[t]['col'][ix], s=10, lw=0, alpha=alpha) count += 1 ax[index].hlines(ttype_inc[t], count / 2 - count * 0.4 / 2, count / 2 + count * 0.4 / 2, lw=2, color='darkgreen') ax[index].spines['top'].set_visible(False) ax[index].spines['right'].set_visible(False) ax[index].spines['bottom'].set_visible(False) ax[index].spines['left'].set_visible(False) ax[index].set_xlabel(t, rotation=90) ax[index].xaxis.set_ticks_position('none') if index > 0: ax[index].yaxis.set_ticks_position('none') labels = [item.get_text() for item in ax[index].get_xticklabels()] empty_string_labels = [''] * len(labels) ax[index].set_xticklabels(empty_string_labels) ax[index].text( ttype_inc[t], 0.22, 'n={}\n{}%'.format(count, round(prop_significant[t], 1))) ax[index].set_ylim(-0.2, 0.3) ax[index].add_patch( plt.Rectangle((0, -0.8), count, 0.03, color=colors[t], lw=1, clip_on=False, linewidth=0)) red_dot = mlines.Line2D([], [], color='darkred', marker='o', linestyle='None', markersize=5, label='Significant sample') grey_dot = mlines.Line2D([], [], color='grey', marker='o', linestyle='None', markersize=5, label='Non significant sample') median_line = mlines.Line2D([], [], color='darkgreen', marker='_', linestyle='None', lw=40, markersize=10, label='Median') plt.legend(handles=[red_dot, grey_dot, median_line], bbox_to_anchor=[1.1, 1.1])
def plot_single(table, title): nucperiod_plt.config_params(font_size=10) YLIM = 60 table = pd.read_csv(table, sep='\t') fig, ax = plt.subplots(nrows=2, ncols=4, figsize=(20, 12)) snrs = [] df = table[table['maxp'] < 19.5] df = df[df['maxp'] > 5.5] for i in range(6, 20): snrs.append(df['snr_{0}p'.format(str(i))].values) ax[0, 0].boxplot(snrs) ax[0, 0].set_xticklabels(range(6, 20)) ax[0, 0].set_xlabel('Period (bp)') ax[0, 0].set_ylabel('SNR') ax[0, 0].set_title('SNR for all 1 Mb chunks') snrs = [] counts = [] for i in range(6, 20): df = table[table['maxp'] < i + 0.5] df = df[df['maxp'] > i - 0.5] counts.append(len(df)) snrs.append(df['snr_{0}p'.format(str(i))].values) ax[1, 0].boxplot(snrs) ax[1, 0].plot(range(1, 15), [v / 10 for v in counts], label='x10 no. chunks') ax[1, 0].set_xticklabels(range(6, 20)) ax[1, 0].set_xlabel('Period (bp)') ax[1, 0].set_ylabel('SNR') ax[1, 0].set_title('SNR for 1 Mb chunks\n peaking at each period') ax[1, 0].legend() snrs = [] df = table[table['maxp'] < 19.5] df = df[df['maxp'] > 5.5] for i in range(6, 20): snrs.append(df['fold_power_increase_{0}p'.format(str(i))].values) ax[0, 1].boxplot(snrs) ax[0, 1].set_xticklabels(range(6, 20)) ax[0, 1].set_ylim(-3, YLIM) ax[0, 1].set_xlabel('Period (bp)') ax[0, 1].set_ylabel('Fold Power Increase') ax[0, 1].set_title('Fold power increase for all 1 Mb chunks') snrs = [] for i in range(6, 20): df = table[table['maxp'] < i + 0.5] df = df[df['maxp'] > i - 0.5] snrs.append(df['fold_power_increase_{0}p'.format(str(i))].values) ax[1, 1].boxplot(np.array(snrs)) ax[1, 1].plot(range(1, 15), [v / 10 for v in counts], label='x10 no. chunks') ax[1, 1].set_xticklabels(range(6, 20)) ax[1, 1].set_ylim(-3, YLIM) ax[1, 1].set_xlabel('Period (bp)') ax[1, 1].set_ylabel('Fold Power Increase') ax[1, 1].set_title('Fold Power Increase for 1 Mb chunks\n peaking at each period') ax[1, 1].legend() snrs = [] df = table[table['maxp'] < 19.5] df = df[df['maxp'] > 5.5] for i in range(6, 20): snrs.append(df['fold_snr_increase_{0}p'.format(str(i))].values) ax[0, 2].boxplot(snrs) ax[0, 2].set_xticklabels(range(6, 20)) ax[0, 2].set_ylim(-3, YLIM) ax[0, 2].set_xlabel('Period (bp)') ax[0, 2].set_ylabel('Fold SNR Increase') ax[0, 2].set_title('Fold SNR Increase for all 1 Mb chunks') snrs = [] for i in range(6, 20): df = table[table['maxp'] < i + 0.5] df = df[df['maxp'] > i - 0.5] snrs.append(df['fold_snr_increase_{0}p'.format(str(i))].values) ax[1, 2].boxplot(snrs) ax[1, 2].plot(range(1, 15), [v / 10 for v in counts], label='x10 no. chunks') ax[1, 2].set_xticklabels(range(6, 20)) ax[1, 2].set_ylim(-3, YLIM) ax[1, 2].set_xlabel('Period (bp)') ax[1, 2].set_ylabel('Fold SNR Increase') ax[1, 2].set_title('Fold SNR Increase for 1 Mb chunks\n peaking at each period') ax[1, 2].legend() discoveries = [] for i in range(6, 20): discoveries.append(len(table[table['logpval_power_{0}p'.format(str(i))] == 2])) barlist = ax[0, 3].bar(list(range(6, 20)), discoveries) barlist[list(range(6, 20)).index(10)].set_color('r') ax[0, 3].hlines(np.mean(np.array(discoveries)), 5.6, 19.4, linestyles='dashed', colors='grey') ax[0, 3].set_xticks(range(6, 20)) ax[0, 3].set_xlabel('Period (bp)') ax[0, 3].set_ylabel('No. chunks significantly high in power') ax[0, 3].set_title('Power Enrichment') discoveries = [] for i in range(6, 20): discoveries.append(len(table[table['logpval_snr_{0}p'.format(str(i))] == 2])) barlist = ax[1, 3].bar(list(range(6, 20)), discoveries) barlist[list(range(6, 20)).index(10)].set_color('r') ax[1, 3].hlines(np.mean(np.array(discoveries)), 5.6, 19.4, linestyles='dashed', colors='grey') ax[1, 3].set_xticks(range(6, 20)) ax[1, 3].set_ylabel('No. chunks significantly high in SNR') ax[1, 3].set_xlabel('Period (bp)') ax[1, 3].set_title('SNR Enrichment') fig.suptitle(title) plt.rcParams['savefig.facecolor'] = fig.get_facecolor()
def spectrum(original_file, simulated_files): nucperiod_plt.config_params(font_size=14) with open(original_file, 'rt') as f: mydict = json.load(f) pair_count_array = np.array(mydict['pair_count']) motif_count = mydict['motif_count'] len_chunk = mydict['chunk_len'] # define the signal signal = np.array([(v / len_chunk) / (motif_count / len_chunk) ** 2 for v in pair_count_array]) # define figure figsize = (10, 5) fig_spec, ax_spec = plt.subplots(1, 1, figsize=figsize) ax_spec.set_xlabel('period (bp)') ax_spec.set_ylabel('power') # 3-smoothing signal = spectral.mean_smooth(signal, 3) counter = 0 # detrended smooth autocorrelation initial_values_dict = {'a_0': np.mean(signal[4:]), 'a_1': 0., 'a_2': 0.} params, obj_func = non_linear.create_quadratic_model(initial_values_dict) x = np.arange(len(signal[4:])) non_linear_fitter = non_linear.NonLinearFitting(obj_func, params, x, signal[4:]) _, predicted = non_linear_fitter.least_squares() signal_detrended = signal[4:] - predicted for file in simulated_files: counter += 1 with open(file, 'rt') as f: random_chunk = json.load(f) pc = np.array(random_chunk['pair_count']) mc = random_chunk['motif_count'] len_random_chunk = random_chunk['chunk_len'] random_signal = np.array([(v / len_chunk) / (mc / len_random_chunk) ** 2 for v in pc]) random_signal = spectral.mean_smooth(random_signal, 3) # detrended smooth autocorrelation initial_values_dict = {'a_0': np.mean(random_signal[4:]), 'a_1': 0., 'a_2': 0.} params, obj_func = non_linear.create_quadratic_model(initial_values_dict) x = np.arange(len(random_signal[4:])) non_linear_fitter = non_linear.NonLinearFitting(obj_func, params, x, random_signal[4:]) _, predicted = non_linear_fitter.least_squares() random_signal_detrended = random_signal[4:] - predicted # DTFT if counter == 1: label = 'randomized' else: label = None dtft_spectrum_plot(random_signal_detrended, ax_spec, title='periodogram', norm=False, color='grey', alpha=0.5, label=label) dtft_spectrum_plot(signal_detrended, ax_spec, title='periodogram', norm=False, label='observed') ax_spec.legend() plt.rcParams['savefig.facecolor'] = fig_spec.get_facecolor()