def rotational_bars(rot_high_files, rot_low_files):
    nuceriod_plt.config_params(11)

    df_high = increase.load_d(rot_high_files)
    df_high['rot'] = 'high'

    df_low = increase.load_d(rot_low_files)
    df_low['rot'] = 'low'

    df = pd.concat([df_high, df_low])

    fig, axs = plt.subplots(nrows=len(df.groupby(by='name')),
                            ncols=1,
                            figsize=(1.75, 8))
    order = ['low', 'high']

    for ix, (sig, data) in enumerate(df.groupby(by='name')):
        xvals = []
        yvals = []
        colors = []
        count = 0
        for i in order:
            val = data[data['rot'] == i]['snr'].tolist()[0]
            yvals.append(val)
            xvals.append(count)
            colors.append(COLORS[sig])
            count += 1
        axs[ix].bar(xvals, yvals, color=colors, label=['low', 'high'])
        axs[ix].set_xticks([0, 1])
        axs[ix].set_xticklabels(('low', 'high'), fontsize=11)
        axs[ix].set_ylabel('SNR')
        axs[ix].spines['right'].set_visible(False)
        axs[ix].spines['top'].set_visible(False)
    plt.tight_layout()
def compare(files_deconstructsigs, files_sigfit):
    nuceriod_plt.config_params(11)

    df_deconstructsigs = increase.load_d(files_deconstructsigs)
    df_deconstructsigs['control'] = 'deconstructsigs'

    df_sigfit = increase.load_d(files_sigfit)
    df_sigfit['control'] = 'sigfit'

    toplot = pd.concat([df_deconstructsigs, df_sigfit])

    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(3, 2))
    sig_list = []
    decon_list = []
    sig_list2 = []
    decon_list2 = []

    for i, data in toplot.groupby(by='name'):
        if len(data) == 2:
            row = data.iloc[0]
            snr_decon = data[data['control'] == 'sigfit']['snr'].iloc[0]
            snr_sigfit = data[data['control'] ==
                              'deconstructsigs']['snr'].iloc[0]
            sig_list.append(snr_sigfit)
            decon_list.append(snr_decon)
            if row['cross_validation_max'] < 0:
                ax.scatter(-np.log(snr_decon),
                           -np.log(snr_sigfit),
                           c=COLORS[i])
                sig_list2.append(-np.log(snr_sigfit))
                decon_list2.append(-np.log(snr_decon))
            else:
                ax.scatter(np.log(snr_decon), np.log(snr_sigfit), c=COLORS[i])
                sig_list2.append(np.log(snr_sigfit))
                decon_list2.append(np.log(snr_decon))
    plt.xlabel('Period')

    ylabels = [str(2**abs(i)) for i in range(2, 10, 2)]
    yfinal = ylabels[::-1] + ylabels[1:]
    plt.xticks(np.arange(-6, 8, 2), yfinal)

    ylabels = [str(2**abs(i)) for i in range(2, 10, 2)]
    yfinal = ylabels[::-1] + ylabels[1:]
    plt.yticks(np.arange(-6, 8, 2), yfinal)

    slope, intercept, r_value, p_value, std_err = stats.linregress(
        sig_list2, decon_list2)
    xvals = np.arange(-6, 8, 2)
    yvals = [slope * y + intercept for y in xvals]
    plt.plot(xvals, yvals)
    R, pval = stats.pearsonr(sig_list, decon_list)

    plt.text(-4, 3, 'R = {}\npval = {}'.format(round(R, 3), round(pval, 3)))
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.ylabel('SNR (Sigfit)')
    plt.xlabel('SNR (Deconstructsig)')

    plt.tight_layout()
Пример #3
0
def compare(cohorts_5mer, cohorts_3mer, cohorts_linker, tumors=None):
    nuceriod_plt.config_params(14)

    df_5mer = increase.load_d(cohorts_5mer)
    df_5mer['control'] = 'mer5'
    df_3mer = increase.load_d(cohorts_3mer)
    df_3mer['control'] = 'mer3'
    df_linker = increase.load_d(cohorts_linker)
    df_linker['control'] = 'linker'

    df = pd.concat([df_5mer, df_3mer, df_linker])

    df['increase_in'] = df['observed_in'] - df['expected_in']
    df['prop_increase_in'] = df['increase_in'] / df['expected_in']

    df['ttype'] = df['name'].map(TTYPES)
    if tumors is not None:
        df = df[df['name'].isin(tumors)]

    toplot = df.sort_values(by='prop_increase_in', ascending=True)

    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 5))

    labels = []
    colors = []
    red_patch = mpatches.Patch(color='red', label='3-mers')
    green_patch = mpatches.Patch(color='green',
                                 label='no nucleosomes in context')
    orange_patch = mpatches.Patch(color='orange', label='5-mers')

    for ix, (ttype, data) in enumerate(
            toplot.sort_values(by='snr', ascending=True).groupby(by='ttype',
                                                                 sort=False)):
        snr1 = data[data['control'] == 'mer3']['snr'].tolist()[0]
        snr2 = data[data['control'] == 'linker']['snr'].tolist()[0]
        snr3 = data[data['control'] == 'mer5']['snr'].tolist()[0]
        colors.append('red')
        colors.append('green')
        colors.append('orange')
        ax.scatter(ix, math.log2(snr1), color='red', s=15, alpha=0.8)
        ax.scatter(ix, math.log2(snr2), color='green', s=15, alpha=0.8)
        ax.scatter(ix, math.log2(snr3), color='orange', s=15, alpha=0.8)

        labels.append(ttype)

    plt.xticks([i for i in range(ix + 1)], labels, rotation=90)
    tick = [2, 4, 6, 8]
    plt.yticks(tick, [str(2**t) for t in tick])
    plt.ylabel('log2(SNR)')
    plt.legend(handles=[red_patch, green_patch, orange_patch])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    plt.tight_layout()
Пример #4
0
def rotational(cohorts_high, cohorts_low, tumors=None):
    nuceriod_plt.config_params(14)

    df_high = increase.load_d(cohorts_high)
    df_high['control'] = 'high'
    df_low = increase.load_d(cohorts_low)
    df_low['control'] = 'low'

    df = pd.concat([df_high, df_low])

    df['increase_in'] = df['observed_in'] - df['expected_in']
    df['prop_increase_in'] = df['increase_in'] / df['expected_in']

    df['ttype'] = df['name'].map(TTYPES)
    if tumors is not None:
        df = df[df['name'].isin(tumors)]

    toplot = df.sort_values(by='prop_increase_in', ascending=True)

    order = ['low', 'high']
    count = 0
    xvals = []
    yvals = []
    colors = []
    dic_t = collections.defaultdict(dict)
    labels = []
    for sig, data in toplot.sort_values(by='snr').groupby(by='ttype',
                                                          sort=False):
        if sig in COLORS:
            labels.append('{}'.format(sig))
            for i in order:
                val = data[data['control'] == i]['snr'].tolist()[0]
                dic_t['Sign {}'.format(sig)][i] = val
                yvals.append(math.log2(val))
                xvals.append(count)
                colors.append(COLORS[sig])
                count += 1

    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 1.5))
    ax.bar(xvals, yvals, color=colors, label=labels)
    ax.set_ylabel('SNR')
    plt.xticks(np.arange(0.5, 54, 2), labels, rotation=90, fontsize=13)
    tick = [2, 4, 6, 8]
    plt.yticks(tick, [str(2**t) for t in tick])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
def _load(files):
    df = increase.load_d(files)

    df['increase_in'] = df['observed_in'] - df['expected_in']
    df['prop_increase_in'] = df['increase_in'] / df['expected_in']
    df['outname'] = df['name'].str.replace('Signature_', 'Sign ')

    df['empirical_pvalue_snr'].replace(0, 0.001, inplace=True)
    df['empirical_pvalue_snr'] = df['empirical_pvalue_snr'].fillna(1)

    qvals = multipletests(df['empirical_pvalue_snr'].tolist(), method='fdr_bh')
    df['qvals_snr'] = qvals[1]

    return df
Пример #6
0
def find(files, min_muts=None):
    df = increase.load_d(files)
    df['ttype'] = df['name'].map(TTYPES)

    if min_muts is not None:
        df = df[df['nmuts_whole_nucleosome'] > min_muts]

    df.sort_values(by='nmuts_whole_nucleosome', ascending=False, inplace=True)

    ttypes = set()
    names = []
    for i, row in df.iterrows():
        if row['ttype'] not in ttypes:
            names.append(row['name'])
            ttypes.add(row['ttype'])
    return names
Пример #7
0
def _load(cohorts, tumors):

    df = increase.load_d(cohorts)

    df['increase_in'] = df['observed_in'] - df['expected_in']
    df['prop_increase_in'] = df['increase_in'] / df['expected_in']
    df['ttype'] = df['name'].map(TTYPES)

    df = df[df['name'].isin(tumors)].sort_values(by='prop_increase_in',
                                                 ascending=True)

    # add Q-value
    df['empirical_pvalue_snr'] = df['empirical_pvalue_snr'].fillna(1)
    df['empirical_pvalue_snr'].replace(0, 0.001, inplace=True)

    qvals = multipletests(df['empirical_pvalue_snr'].tolist(), method='fdr_bh')
    df['qvals_snr'] = qvals[1]

    return df
Пример #8
0
def _load(files):

    df = increase.load_d(files)

    df['increase_in'] = df['observed_in'] - df['expected_in']
    df['prop_increase_in'] = df['increase_in'] / df['expected_in']

    df = df.sort_values(by='prop_increase_in', ascending=True)

    df = df[df['nmuts_whole_nucleosome'] > 1000]

    # add Q-value
    df['empirical_pvalue_snr'] = df['empirical_pvalue_snr'].fillna(1)
    df['empirical_pvalue_snr'].replace(0, 0.001, inplace=True)

    qvals = multipletests(df['empirical_pvalue_snr'].tolist(), method='fdr_bh')
    df['qvals_snr'] = qvals[1]

    df['corrected_snr'] = df.apply(_sign_SNR, axis=1)

    return df
def cohorts(files, colors):

    nuceriod_plt.config_params_full(font_size=7)

    df = increase.load_d(files)

    df['increase_in'] = df['observed_in'] - df['expected_in']
    df['prop_increase_in'] = df['increase_in'] / df['expected_in']

    df['signature_name'] = df['name'].str.split('__').str.get(1)
    df['ttype'] = df['name'].str.split('__').str.get(0)

    df['empirical_pvalue_snr'].replace(0, 0.001, inplace=True)
    df['empirical_pvalue_snr'] = df['empirical_pvalue_snr'].fillna(1)

    qvals = multipletests(df['empirical_pvalue_snr'].tolist(), method='fdr_bh')
    df['qvals_snr'] = qvals[1]

    fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(7, 2))

    signature_groups = df.groupby('signature_name')

    for i in range(1, 31):
        ix = i - 1
        try:
            data = signature_groups.get_group('Signature_{}'.format(i))
        except KeyError:
            continue
        else:

            for _, row in data.iterrows():

                if row['cross_validation_max'] > 0:
                    if row['qvals_snr'] < 0.05 and row['snr'] > 8 and 10 < row[
                            'peak'] < 10.4:
                        axs[0].scatter(jitter(ix),
                                       np.log2(row['snr']),
                                       c=colors[row['ttype']],
                                       s=25,
                                       edgecolor='black',
                                       linewidth=0.5)
                    else:
                        axs[0].scatter(jitter(ix),
                                       np.log2(row['snr']),
                                       c='grey',
                                       s=6)

                elif row['cross_validation_max'] < 0:

                    if row['qvals_snr'] < 0.05 and row['snr'] > 8 and 10 < row[
                            'peak'] < 10.4:
                        axs[1].scatter(jitter(ix),
                                       -np.log2(row['snr']),
                                       c=colors[row['ttype']],
                                       s=25,
                                       edgecolor='black',
                                       linewidth=0.5)
                    else:
                        axs[1].scatter(jitter(ix),
                                       -np.log2(row['snr']),
                                       c='grey',
                                       s=8)

    yvals = [i for i in range(2, 10, 2)]
    axs[0].set_yticks(yvals)

    yvals = [i for i in range(-8, 0, 2)]
    axs[1].set_yticks(yvals)
    ylabels = [str(2**abs(i)) for i in range(2, 10, 2)]
    axs[0].set_yticklabels(ylabels)

    ylabels = ['{}'.format(str(2**abs(i))) for i in range(-8, 0, 2)]
    axs[1].set_yticklabels(ylabels)

    axs[0].spines['right'].set_visible(False)
    axs[0].spines['top'].set_visible(False)
    axs[0].set_xlim(-1, 30)
    xlabels = [i for i in range(1, 31)]
    xpos = [i for i in range(0, 30)]

    axs[1].set_xticklabels(xlabels)
    plt.xticks(xpos, xlabels)
    axs[1].xaxis.set_ticks_position('top')
    axs[1].spines['bottom'].set_visible(False)
    axs[1].spines['right'].set_visible(False)

    axs[0].set_ylim(1.5, 10)
    axs[1].set_ylim(-10, -1.5)
    plt.tight_layout()