예제 #1
0
def run():

    config_params()
    np.random.seed(12345)
    treated_samples_full = pickle.load(gzip.open(Conf['treatment_FDA_drug']))

    base_path = 'data/hartwig/signatures/extraction/results/SignatureAnalyzer/'
    exposures_path = base_path + 'snvs/exposures/Pan_full/Pan_full.exposures.tsv'
    # load exposures of Pan
    exp_snv = load_exposures(exposures_path)

    exposures_path = base_path + 'indels/exposures/Pan/Pan.exposures.tsv'

    # load exposures of Pan
    exp = load_exposures(exposures_path)

    treatment = 'RADIATION'

    breaks = [
        'Unknown', 'Topoisomerase Inhibitor',
        'Anthracycline Topoisomerase Inhibitor', 'Alkylating Drug',
        'Platinum-based Drug', 'Poly(ADP-Ribose) Polymerase Inhibitor',
        'Miscellanious', 'Nucleoside Analog Antiviral', 'TOPOII',
        'Nuclear therapy'
    ]

    samples_braca, samples_lowbraca = select_HRdeficient_samples(exp_snv)
    only_treated, not_breaked = select_treated_not_treated(
        treated_samples_full, treatment, breaks, exp)

    do_plot(samples_lowbraca, samples_braca, not_breaked, only_treated, exp)
def do_plot(exposures_pan, sig, MMR_notaffected_not_tzm_variants,
            MMR_affected_not_tzm_variants, MMR_notaffected_tzm_variants,
            MMR_affected_tzm_variants):

    config_params(6.5)

    fig, ax = plt.subplots(1, 1, figsize=(1.25, 1.5))
    sns.stripplot(data=[
        exposures_pan[MMR_notaffected_not_tzm_variants].loc[sig],
        exposures_pan[MMR_affected_not_tzm_variants].loc[sig],
        exposures_pan[MMR_notaffected_tzm_variants].loc[sig].tolist(),
        exposures_pan[MMR_affected_tzm_variants].loc[sig].tolist(),
    ],
                  jitter=0.3,
                  s=2,
                  lw=0.5,
                  color='#800080ff')

    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)

    plt.xticks([0, 1, 2, 3], [
        'TZM untreated, MMR-def (n={})'.format(
            len(MMR_notaffected_not_tzm_variants)),
        'TZM untreated, MMR-def (n={})'.format(
            len(MMR_affected_not_tzm_variants)),
        'TZM treated, no MMR-def (n={})'.format(
            len(MMR_notaffected_tzm_variants)),
        'TZM treated, MMR-def  (n={})'.format(len(MMR_affected_tzm_variants)),
    ],
               rotation=90)

    plt.ylabel('TMZ related SBS')
    plt.savefig('figures/TZM_treated.svg')
    plt.close()
예제 #3
0
def specific_treatments_bar(drug):

    config_params(5)
    dplot_treat = defaultdict(int)
    typeDrug = pickle.load(
        gzip.open('data/clinical_data/hartwig_typeDrug.pckl.gz'))
    samples_tract_specific_FDA = pickle.load(
        gzip.open(Conf['treatment_specific_drug']))

    for k in typeDrug[drug].keys():
        number_treated = samples_tract_specific_FDA[k]['Pan']['YES']
        dplot_treat[k] = len(number_treated)

    sorted_keys = sorted(dplot_treat, key=dplot_treat.get, reverse=True)

    fig, ax = plt.subplots(1, 1, figsize=(0.3, 1))
    bottom = 0
    for treat in sorted_keys:
        ax.bar(0, dplot_treat[treat], width=1, bottom=bottom)
        ax.text(-0.1, dplot_treat[treat] + bottom - 150,
                str(dplot_treat[treat]))
        bottom += dplot_treat[treat]

    plt.xticks([0], fontsize=5)

    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    plt.savefig('figures/2A_{}_barplot.svg'.format(drug.lower().replace(
        ' ', '_')))
    plt.savefig('figures/2A_{}_barplot.png'.format(drug.lower().replace(
        ' ', '_')),
                dpi=600)

    plt.close()
예제 #4
0
def plot_distribution_tissue_origin():

    # PLOT DISTRIBUTION NUMBER COHORTS
    file_metadata = Conf['path_metadata']

    colors_ttype = return_colors()

    # read Hartwig metadata
    pat = pd.read_csv(file_metadata, sep='\t')

    pat['primaryTumorLocation'] = pat['primaryTumorLocation'].replace(
        'Bone/soft tissue', 'Bone/Soft tissue')

    # fix primary location
    pat['primaryTumorLocation_fixed'] = pat['primaryTumorLocation'].apply(
        lambda x: str(x).replace(' ', '-').replace('/', '-'))
    pat['primaryTumorLocation_fixed'] = pat[
        'primaryTumorLocation_fixed'].replace('Head-and-Neck', 'Head-and-neck')
    pat['primaryTumorLocation_fixed'] = pat[
        'primaryTumorLocation_fixed'].replace('nan', 'Unknown')
    pat['primaryTumorLocation_fixed'] = pat[
        'primaryTumorLocation_fixed'].replace('CUP', 'Unknown')
    pat['primaryTumorLocation_fixed'] = pat[
        'primaryTumorLocation_fixed'].replace('Net', 'NET')

    dic_tumor = pat['primaryTumorLocation_fixed'].value_counts().to_dict()

    sorted_keys = sorted(dic_tumor, key=dic_tumor.get, reverse=True)

    config_params(font_size=5.5)
    fig, ax = plt.subplots(1, 1, figsize=(2, 2.35))
    labels = []
    count = 0
    for ix, k in enumerate(sorted_keys[::-1]):
        if dic_tumor[k] > 20:
            if k in colors_ttype:
                count += 1
                ax.barh(count, dic_tumor[k], color=colors_ttype[k])
                ax.text(dic_tumor[k] + 40,
                        count - 0.1,
                        dic_tumor[k],
                        verticalalignment='center')
                labels.append(k)

    plt.yticks([i + 1 for i in range(0, len(labels))], labels, rotation=0)

    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)

    plt.xlabel('Number of samples')
    ax.xaxis.tick_top()
    ax.xaxis.set_label_position('top')
    plt.tight_layout()
    ax.xaxis.set_tick_params(width=0.45)
    ax.yaxis.set_tick_params(width=0.45)
    plt.savefig('figures/1B.svg')
    plt.savefig('figures/1B.png', dpi=600)

    plt.show()
def plot_scatter_bars(expos, sig):

    non_treated, samples_FU, samples_capecitabine = get_treated_not_treated()
    vals_to_plot_FU, colors_FU, count_exposed_FU, count_total_FU = get_items_to_plot(
        samples_FU, expos, sig)
    vals_to_plot_capecitabine, colors_capecitabine, count_exposed_cape, count_total_cape = get_items_to_plot(
        samples_capecitabine, expos, sig)

    for ttype in ['Colon-Rectum', 'Breast']:
        config_params(font_size=7)

        fig, ax = plt.subplots(1, 1, figsize=(0.5, 1.25))
        plt.yscale('log')
        ax.set_ylabel('SBS Capecitabine/5-FU')

        range_nums = get_range_nums(vals_to_plot_FU[ttype], 0)
        ax.scatter(range_nums,
                   sorted(vals_to_plot_FU[ttype]),
                   color=colors_FU[ttype],
                   s=1,
                   alpha=0.75)

        range_nums = get_range_nums(vals_to_plot_capecitabine[ttype], 1)
        ax.scatter(range_nums,
                   sorted(vals_to_plot_capecitabine[ttype]),
                   color=colors_capecitabine[ttype],
                   s=1,
                   alpha=0.75)

        print(len(vals_to_plot_FU[ttype]),
              len(vals_to_plot_capecitabine[ttype]), ttype)
        plt.xticks([0.5, 1.5], ['5-FU', 'Capecitabine'], rotation=90)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        plt.ylim(0, 10000)
        plt.savefig('figures/sigmoid_treat_{}.svg'.format(ttype))
        plt.show()
        fig, ax = plt.subplots(1, 1, figsize=(0.5, 0.4))

        ax.bar(0,
               count_exposed_FU[ttype] / count_total_FU[ttype],
               color='#2c89a0ff')
        ax.bar(1,
               count_exposed_cape[ttype] / count_total_cape[ttype],
               color='#2c89a0ff')
        plt.ylim(0, 1)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.set_ylabel('Proportion  of samples\nwith activity')
        plt.savefig('figures/bar_treat_{}.svg'.format(ttype))
        plt.show()
예제 #6
0
def do_volcano(significant_vetted, nonsignificant, significant_not_vetted,
               method_extraction):

    config_params()
    fig, ax = plt.subplots(1, 1, figsize=(4.2, 4))
    size = 3
    plt.hlines(-np.log10(0.001),
               0,
               10,
               linestyles="dashed",
               color='red',
               alpha=0.3)
    plt.vlines(2, 0, 7, linestyles="dashed", color='red', alpha=0.3)

    for i, row in significant_vetted.iterrows():
        plt.scatter(row['effect_size'],
                    row['logp'],
                    c='#99d8c9',
                    s=size,
                    marker=row['format_type'])

    for i, row in nonsignificant.iterrows():
        plt.scatter(row['effect_size'],
                    row['logp'],
                    c='grey',
                    s=size,
                    marker=row['format_type'])

    for i, row in significant_not_vetted.iterrows():
        plt.scatter(row['effect_size'],
                    row['logp'],
                    c=row['color'],
                    s=25,
                    edgecolor='black',
                    linewidths=0.7,
                    marker=row['format_type'])

    plt.xlim(0, 16)
    plt.ylim(0, 3.1)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)

    plt.ylabel('statistical significance\n(-log10 pvalue)')
    plt.xlabel('treated-untreated fold change')
    plt.tight_layout()
    plt.savefig('figures/{}/volcano.svg'.format(method_extraction))
    plt.savefig('figures/{}/volcano.png'.format(method_extraction), dpi=600)

    plt.show()
예제 #7
0
def do_plot(d_box, dttype, outfile, signature_2_treatment):

    np.random.seed(12345)

    config_params(6)

    toplot = []
    labels = []

    for sig in sorted(d_box, key=lambda k: np.median(d_box[k]), reverse=False):

        if (len(d_box[sig]) > 5) & (sig in signature_2_treatment):
            toplot.append(sorted(d_box[sig]))
            labels.append(sig)

    fig, ax = plt.subplots(1, 1, figsize=(3, 3))

    plt.yscale('log')
    color_ttype = return_colors()

    number_of_samples = []
    for ix, signature in enumerate(labels):
        plotdot = []
        colors = []
        for ttype, samples in dttype[signature].items():
            for sample in samples:
                plotdot.append(sample)
                colors.append(color_ttype[ttype])
        ax.scatter([ix + np.random.uniform(-0.2, 0.2, 1)[0] for i in range(len(plotdot))],
                   plotdot, color=colors, s=1, alpha=0.75)
        number_of_samples.append(len(plotdot))

    box = sns.boxplot(data=toplot, color='black', linewidth=0.6, ax=ax, showfliers=False)
    for b in box.artists:
        b.set_facecolor('#d1d1d1ff')

    plt.ylabel('foldchange late/early')
    ax.set_xticklabels(['{}\n{}'.format(l, number_of_samples[ixl]) for ixl, l in enumerate(labels)], rotation=90)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_ylim(0.05, 25)

    ax.hlines(1, 0, len(toplot), alpha=0.5)

    plt.tight_layout()
    plt.savefig(outfile)
예제 #8
0
def do_plot(list_val, norm=False):

    fig, axs = plt.subplots(1, 1, figsize=(0.1, 1.5))
    toplot = 0
    config_params()

    if norm is False:
        axs.set_xlabel('{} ({})'.format('Pediatric', len(list_val)),
                       rotation=90)
        axs.set_ylim(10, 200000)
        axs.set_yscale('log')

    range_nums = [num / len(list_val) for num in range(len(list_val))]

    axs.hlines(np.median(list_val),
               np.median(range_nums) - 0.25,
               np.median(range_nums) + 0.45,
               color='grey',
               alpha=0.4)
    axs.scatter(range_nums,
                sorted(list_val),
                s=2,
                linewidth=0.2,
                color='#006400ff')

    axs.set_xlim(-0.2, 1.1)
    axs.spines['bottom'].set_visible(False)
    axs.spines['right'].set_visible(False)
    axs.set_xticklabels([])

    if toplot > 0:
        axs.spines['left'].set_visible(False)
    else:
        axs.set_ylabel('Number of SBS\nassociated to treatments')

    lab = 'full'
    if norm is True:
        axs.set_ylim(0, 0.8)
        lab = 'norm'

    axs.xaxis.set_label_position('top')
    axs.xaxis.set_ticks_position('none')
    axs.yaxis.set_ticks_position('none')

    plt.savefig('figures/stjude_{}.svg'.format(lab))
예제 #9
0
def analysis_two_timepoints(drug, signature_dic, exposures_path):

    config_params()

    lower_drug = drug.lower().capitalize()
    df_treat, dup = get_patients_two_points()
    (keep_patients, stringent, samples_not_treated, samples_treated,
     space_between_biopsies, time_treated,
     start_treatment) = get_pre_post(lower_drug, df_treat, dup)
    df_exp, samples_exposed = read_exposures(exposures_path)

    fig, ax = plt.subplots(1, 1, figsize=(2, 2))
    plt.ylabel('{}-related mutations'.format(lower_drug.capitalize()))
    plt.xlabel('Days')

    for ix in range(len(samples_not_treated)):
        if (samples_not_treated[ix] in samples_exposed) & (samples_treated[ix]
                                                           in samples_exposed):

            exposure_not_treated = df_exp.loc[samples_not_treated[ix]][
                signature_dic[drug]].sum()
            exposure_treated = df_exp.loc[samples_treated[ix]][
                signature_dic[drug]].sum()

            plt.plot([0, space_between_biopsies[ix]],
                     [exposure_not_treated, exposure_treated],
                     color='#d95f0e',
                     ls='--')

            slope = (exposure_treated -
                     exposure_not_treated) / space_between_biopsies[ix]
            start_t = start_treatment[ix]
            end_t = start_treatment[ix] + time_treated[ix]

            plt.plot([start_t, end_t], [
                exposure_not_treated + slope * start_t,
                exposure_not_treated + slope * end_t
            ],
                     color='darkred',
                     lw=2)
            plt.scatter(space_between_biopsies[ix], exposure_treated)

    plt.savefig('figures/{}.pre_post.svg'.format(drug))
    plt.close()
예제 #10
0
def do_plot(type_extraction, axis_replication, axis_transcription,
            selected_sigs, wanted_sigs):

    config_params(6.5)
    fig, ax = plt.subplots(1, 1, figsize=(
        1.85,
        2.1,
    ))
    plt.scatter(axis_replication, axis_transcription, s=1, c='grey')

    for sig in wanted_sigs:
        print(sig, selected_sigs[sig])
        x, y = selected_sigs[sig]

        plt.scatter(x, y, s=10, c='#ff6600ff')

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    plt.ylabel('Transcription strand asymmetry')
    plt.xlabel('Replication strand asymmetry')

    plt.savefig('figures/{}/repli_trans_assym.svg'.format(type_extraction))
    plt.show()
예제 #11
0
def plot_bias_indel(d_clustered, d_unclustered, outpath, ttype, label1,
                    label2):

    config_params(3)

    final_order = []
    order_plot = order_to_plot_indel()

    for o in order_plot:
        final_order.append('{}_{}'.format(o, label1))
        final_order.append('{}_{}'.format(o, label2))

    for s in d_clustered.columns:

        sig = d_clustered[s]

        colors_ind = ['#fdbe6f'] * 6 + ['#ff8002'] * 6 + ['#b0dd8b'] * 6 + ['#36a12e'] * 6 + \
                     ['#fdcab5'] * 6 + ['#fc8a6a'] * 6 + ['#f14432'] * 6 + ['#bc191a'] * 6 + \
                     ['#d0e1f2'] * 6 + ['#94c4df'] * 6 + ['#4a98c9'] * 6 + ['#1764ab'] * 6 + \
                     ['#e1e1ef'] * 1 + ['#b6b6d8'] * 2 + ['#8683bd'] * 3 + ['#62409b'] * 5

        colors_individual = [
            '#fdbe6f', '#ff8002', '#b0dd8b', '#36a12e', '#fdcab5', '#fc8a6a',
            '#f14432', '#bc191a', '#d0e1f2', '#94c4df', '#4a98c9', '#1764ab',
            '#e1e1ef', '#b6b6d8', '#8683bd', '#62409b'
        ]

        fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(10, 4))

        colors = []

        for col in colors_ind:
            colors.append(col)
            colors.append(col)

        x = [i for i in range(len(colors))]

        sig = []
        for lag, lead in zip(d_clustered[s], d_unclustered[s]):
            sig.append(lag)
            sig.append(lead)

        axs[0].bar(x, sig, color=colors, width=0.8, linewidth=0)
        axs[0].set_xticks(x)
        axs[0].set_xticklabels(final_order, rotation=90, fontsize=2)
        axs[0].spines['top'].set_visible(False)

        axs[0].set_xlim(-1, 172)

        toplot_full = []
        colors_joinbar = []
        marks = []

        start = 0

        sizes = [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1, 2, 3, 5]

        for ix, len_size in enumerate(sizes):

            c = sig[start:start + len_size * 2]
            start = start + len_size * 2
            c_condition1 = np.sum(c[::2])
            c_condition2 = np.sum(c[1::2])
            colors_joinbar.append(colors_individual[ix])
            colors_joinbar.append(colors_individual[ix])
            toplot_full.append(c_condition1)
            toplot_full.append(c_condition2)

            pval = poisson_exact(c_condition1, c_condition2)
            if pval < 0.05:
                mark = '*'
            else:
                mark = ''
            marks.append(mark)
            marks.append(mark)

        xjoin = [i for i in range(len(toplot_full))]
        axs[3].bar(xjoin,
                   toplot_full,
                   color=colors_joinbar,
                   width=0.5,
                   linewidth=0)
        axs[3].set_xticks(xjoin)
        for ix, (mark, val) in enumerate(zip(marks, toplot_full)):
            axs[3].text(xjoin[ix], val + 10, mark)

        vector = []
        for lag, lead in zip(sig[0::2], sig[1::2]):
            vector.append(lag - lead)

        x = [i for i in range(len(colors_ind))]

        axs[1].bar(x, vector, color=colors_ind, width=0.5, linewidth=0)
        axs[1].set_xticks(x)
        axs[1].set_xticklabels(order_plot, rotation=90, fontsize=2)
        axs[1].set_xlim(-1, 86)
        axs[1].spines['top'].set_visible(False)

        vector = []

        sig = sig / np.sum(sig)
        for lag, lead in zip(sig[0::2], sig[1::2]):
            vector.append(lag + lead)

        x = [i for i in range(len(colors_ind))]
        axs[2].bar(x, vector, color=colors_ind, width=0.75, linewidth=0)
        axs[2].set_xticks(x)
        axs[2].set_xticklabels(order_plot, rotation=90, fontsize=2)
        axs[2].set_xlim(-1, 86)

        axs[2].spines['top'].set_visible(False)

        axs[0].spines['top'].set_visible(False)
        axs[0].set_ylabel('NMF counts')
        axs[1].set_ylabel('{} - {}'.format(label1, label2))

        axs[2].set_ylabel('Relative Probability')

        plt.setp([axs[0].get_xticklines(), axs[0].get_yticklines()],
                 color='grey')
        plt.setp([axs[1].get_xticklines(), axs[1].get_yticklines()],
                 color='grey')
        plt.setp([axs[2].get_xticklines(), axs[2].get_yticklines()],
                 color='grey')
        plt.setp([axs[3].get_xticklines(), axs[2].get_yticklines()],
                 color='grey')

        axs[0].xaxis.set_ticks_position('none')
        axs[1].xaxis.set_ticks_position('none')
        axs[2].xaxis.set_ticks_position('none')
        axs[3].xaxis.set_ticks_position('none')

        for axis in ['top', 'bottom', 'left', 'right']:
            axs[0].spines[axis].set_linewidth(0.2)
            axs[1].spines[axis].set_linewidth(0.2)
            axs[2].spines[axis].set_linewidth(0.2)
            axs[3].spines[axis].set_linewidth(0.2)
        for indx in [0, 1, 2, 3]:
            axs[indx].xaxis.set_tick_params(pad=0.5)
            axs[indx].yaxis.set_tick_params(pad=0.5, width=0.5)

        plt.tight_layout()

        plt.savefig('{}/processes/{}/{}.{}.png'.format(outpath, ttype, ttype,
                                                       s),
                    dpi=300,
                    bbox_inches='tight')
        plt.savefig('{}/processes/{}/{}.{}.svg'.format(outpath, ttype, ttype,
                                                       s))

        plt.close()

        fig, ax = plt.subplots(1, 1, figsize=(1, 1))
        total_max = np.max([np.max(d_clustered[s]), np.max(d_unclustered[s])])

        plt.plot([0, total_max], [0, total_max], lw=1, alpha=0.4)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)

        plt.scatter(d_clustered[s], d_unclustered[s], s=5, c=colors_ind)
        plt.xlim(0, np.max(d_clustered[s]) + 10)
        plt.ylim(0, np.max(d_unclustered[s]) + 10)

        plt.xlabel(label1)
        plt.ylabel(label2)

        plt.tight_layout()
        plt.savefig('{}/processes/{}/{}.{}.diagonal.png'.format(
            outpath, ttype, ttype, s),
                    dpi=300,
                    bbox_inches='tight')
        plt.savefig('{}/processes/{}/{}.{}.diagonal.svg'.format(
            outpath, ttype, ttype, s))

        plt.close()
예제 #12
0
def sankey_plot_main():

    config_params(font_size=4)

    hv.extension('matplotlib')
    hv.output(fig='svg')

    forbidden = ['RADIATION', 'Miscellanious', 'Unknown', 'TopoII', 'TOPOII']
    out, dic_t = create_matrix_treatments_plot()
    order_ttypes = [
        'Breast',
        'Colon-Rectum',
        'Prostate',
        'Lung',
        'Skin',
        'Bone-Soft-tissue',
        'Ovary',
        'Esophagus',
        'Urinary-tract',
        'NET',
        'Kidney',
        'Nervous-system',
        'Biliary',
        'Pancreas',
        'Unknown',
        'Uterus',
        'Head-and-neck',
        'Liver',
        'Stomach',
        'Mesothelioma',
    ]

    all_rows = []
    for ttype in order_ttypes:
        samples = dic_t[ttype]
        subs = out.loc[samples]
        for col in subs:
            if col not in forbidden:
                all_rows.append((ttype, col, int(subs[col].sum())))

    matrix_df = pd.DataFrame(all_rows)
    matrix_df.columns = ['target', 'source', 'value']
    matrix_df = matrix_df[(matrix_df['target'] != 'Unknown')]
    matrix_df = matrix_df.fillna(0)
    matrix_df['value'] = matrix_df['value'].astype(int)

    good_source = set()
    for source, data in matrix_df.groupby(by='source'):
        tot = data['value'].sum()
        if tot > 30:
            if source != 'Unknown':
                good_source.add(source)
    matrix_df = matrix_df[matrix_df['source'].isin(good_source)]
    out = hv.Sankey(matrix_df.sort_values(
        by='source',
        ascending=True,
    ),
                    label='').opts(label_position='left',
                                   edge_color='target',
                                   node_color='index',
                                   cmap='Set1')  # color=total_colors)

    fig = hv.render(out)
    fig.set_figwidth(10)
    fig.savefig('figures/2A.svg')
    fig.savefig('figures/2A.png', dpi=600)
예제 #13
0
def plot_heatmap_treatment(file):

    outf = os.path.basename(file).split('.')[0]
    dic_primary_full, _ = return_metadata()
    color_ttype = return_colors()
    total_s = defaultdict(list)
    total_count = defaultdict(int)

    for sample, t in dic_primary_full.items():
        total_s[t].append(sample)
        total_count[t] += 1
    sorted_ttyps = sorted(total_count, key=total_count.get, reverse=True)

    treated = pickle.load(gzip.open(file))
    forbidden = ['RADIATION', 'TOPOII']
    matrix_treated = defaultdict(lambda: defaultdict(int))

    for sample, t in dic_primary_full.items():
        for k, d in treated.items():
            if k not in forbidden:
                if sample in d[t]['YES']:
                    matrix_treated[sample][k] = 1
                elif sample in d[t]['NO']:
                    matrix_treated[sample][k] = 0

    d_treatments = defaultdict(int)
    for k, d in treated.items():
        if k not in forbidden:
            for ttype, l in d.items():
                d_treatments[k] += len(l['YES'])

    sorted_treatments = sorted(d_treatments,
                               key=d_treatments.get,
                               reverse=True)
    out = pd.DataFrame.from_dict(matrix_treated, orient='index')
    out['TTYPE'] = [dic_primary_full[t] for t in out.index.tolist()]
    out['sum'] = out.sum(axis=1)
    out = out[out['sum'] > 0].drop('sum', axis=1)

    forbidden = ['Double-primary']
    order_sample_plot = []
    order = []
    dic_len = defaultdict(int)
    for ttype in tqdm(sorted_ttyps):
        if ttype not in forbidden:
            subs = out[out['TTYPE'] == ttype]
            mat = subs.drop('TTYPE', axis=1).dropna()[sorted_treatments[:30]]
            if len(mat) > 1:
                n = classic_mutual_exclusivity_visualization(
                    mat, sorted_treatments[:30])
                new_order = n.dendrogram_col.reordered_ind
                sample_list = mat.reset_index().loc[new_order]['index'].tolist(
                )
                order_sample_plot.extend(sample_list)
                order.append(ttype)
                dic_len[ttype] = len(sample_list)

    new_cmap = LinearSegmentedColormap.from_list(
        "", ["lightgrey", "grey", "darkred"])
    concat = out.loc[order_sample_plot].drop('TTYPE', axis=1)
    concat = concat[sorted_treatments[:20]]

    if 'specific' in outf:
        new_cols = [s.lower() for s in concat.columns]
        concat.columns = new_cols

    config_params(2)
    fig, ax = plt.subplots(1,
                           2,
                           figsize=(1, 3),
                           gridspec_kw={'width_ratios': [1, 27]})
    ax2 = sns.heatmap(concat,
                      cmap=new_cmap,
                      yticklabels=False,
                      ax=ax[1],
                      cbar=False)
    ax[1].xaxis.set_ticks_position('top')
    bot = 0
    for t in order[::-1]:
        ax[0].bar(0, dic_len[t], bottom=bot, color=color_ttype[t])
        bot += dic_len[t]
    ax[0].set_ylim(0, bot)
    ax[0].spines['top'].set_visible(False)
    ax[0].spines['bottom'].set_visible(False)
    ax[0].spines['left'].set_visible(False)
    ax[0].spines['right'].set_visible(False)
    ax[0].get_yaxis().set_visible(False)
    ax[0].get_xaxis().set_visible(False)

    plt.xticks(rotation=90)
    plt.savefig('figures/EDF1_{}.png'.format(outf), dpi=600)
    plt.close()
예제 #14
0
def do_plot(samples_lowbraca, samples_braca, not_breaked, only_treated, exp):

    dic_primary_full, _ = return_metadata()
    color_ttype = return_colors()
    fig, ax = plt.subplots(1, 1, figsize=(2, 1.5))
    config_params(6.5)

    sigs = ['12_ID6_0.962941_1']

    exposures_not_breaked_1 = exp[sigs].sum(axis=1).loc[[
        i for i in not_breaked if i in samples_lowbraca
    ]].dropna()
    exposures_not_breaked_2 = exp[sigs].sum(axis=1).loc[[
        i for i in not_breaked if i in samples_braca
    ]].dropna()
    exposures_breaked_1 = exp[sigs].sum(axis=1).loc[[
        i for i in only_treated if i in samples_lowbraca
    ]].dropna()
    exposures_breaked_2 = exp[sigs].sum(axis=1).loc[[
        i for i in only_treated if i in samples_braca
    ]].dropna()

    sns.boxplot(data=[
        exposures_not_breaked_1, exposures_breaked_1, exposures_not_breaked_2,
        exposures_breaked_2
    ],
                linewidth=0.6,
                showfliers=False,
                color='#cbcacbff')

    plt.ylabel('Indels DSB repair by\nnon-homologous end-joining')
    plt.xticks([0, 1, 2, 3], [
        'Not radiated no BRCAness ({})'.format(len(exposures_not_breaked_1)),
        'Radiated no BRCAness ({})'.format(len(exposures_breaked_1)),
        'Not radiated BRCAness ({})'.format(len(exposures_not_breaked_2)),
        'Radiated BRCAness ({})'.format(len(exposures_breaked_2))
    ],
               rotation=90)

    plotdot = []
    colors = []
    for sample in [i for i in not_breaked if i in samples_lowbraca]:
        plotdot.append(exp[sigs].sum(axis=1).loc[sample])
        colors.append(color_ttype[dic_primary_full[sample]])

    ax.scatter(
        [0 + np.random.uniform(-0.2, 0.2, 1)[0] for i in range(len(plotdot))],
        plotdot,
        color=colors,
        s=1,
        alpha=0.2)

    plotdot = []
    colors = []
    for sample in [i for i in only_treated if i in samples_lowbraca]:
        plotdot.append(exp[sigs].sum(axis=1).loc[sample])
        colors.append(color_ttype[dic_primary_full[sample]])

    ax.scatter(
        [1 + np.random.uniform(-0.2, 0.2, 1)[0] for i in range(len(plotdot))],
        plotdot,
        color=colors,
        s=1,
        alpha=0.2)

    plotdot = []
    colors = []
    for sample in [i for i in not_breaked if i in samples_braca]:
        plotdot.append(exp[sigs].sum(axis=1).loc[sample])
        colors.append(color_ttype[dic_primary_full[sample]])

    ax.scatter(
        [2 + np.random.uniform(-0.2, 0.2, 1)[0] for i in range(len(plotdot))],
        plotdot,
        color=colors,
        s=1,
        alpha=0.2)

    plotdot = []
    colors = []
    for sample in [i for i in only_treated if i in samples_braca]:
        plotdot.append(exp[sigs].sum(axis=1).loc[sample])
        colors.append(color_ttype[dic_primary_full[sample]])

    ax.scatter(
        [3 + np.random.uniform(-0.2, 0.2, 1)[0] for i in range(len(plotdot))],
        plotdot,
        color=colors,
        s=1,
        alpha=0.2)

    plt.ylim(0, 700)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.savefig('figures/radiation.svg')
    plt.show()

    ##################

    fig, ax = plt.subplots(
        1,
        1,
    )
    stat, pval1 = mannwhitneyu(exposures_not_breaked_1, exposures_breaked_1)
    print("Not radiated no BRCAnes vs Radiated no BRCAness", pval1)

    stat, pval2 = mannwhitneyu(exposures_not_breaked_2, exposures_breaked_2)
    print("Not radiated  BRCAnes vs Radiated  BRCAness", pval2)

    stat, pval3 = mannwhitneyu(exposures_breaked_1, exposures_breaked_2)
    print("radiated  no BRCAnes vs Radiated  BRCAness", pval3)

    ax.text(1, 1, "$\it{P}$" + " = {}".format(sci_notation(pval1)), fontsize=7)
    ax.text(1, 4, "$\it{P}$" + " = {}".format(sci_notation(pval2)), fontsize=7)
    ax.text(1, 8, "$\it{P}$" + " = {}".format(sci_notation(pval3)), fontsize=7)

    plt.xlim(0, 5)
    plt.ylim(0, 10)
    plt.savefig('figures/radiation_pvals.svg')
    sys.exit()
예제 #15
0
                             outpath, 'count')
    plot_single_correlation(dsigprof_simple, dsiganalyzer_simple, drug,
                            outpath, 'count')
    plot_single_distribution(dsigprof_simple_exposure,
                             dsiganalyzer_simple_exposure, drug, outpath,
                             'exposure')
    plot_single_correlation(dsigprof_simple_exposure,
                            dsiganalyzer_simple_exposure, drug, outpath,
                            'exposure')

    bad_samples_drug = get_samples_similar_exposure(
        dsigprof_simple_exposure, dsiganalyzer_simple_exposure, drug, outpath,
        'exposure')


config_params(5)


def run():

    dic_primary_full, _ = return_metadata()
    signature_2_treatment_signature_analyzer = {
        'CISPLATIN': ['21_SBS31_0.953955_1', '14_1'],
        'CARBOPLATIN': ['21_SBS31_0.953955_1', '25_1'],
        '5-FU_CAPE': ['31_SBS17b_0.968799_1'],
        'OXALIPLATIN': ['14_1', '37_1']
    }
    signature_2_treatment_sigprofiler = {
        'CISPLATIN': ['1_SBS31_0.968153_0.98'],
        'CARBOPLATIN': ['1_SBS31_0.968153_0.98'],
        'OXALIPLATIN': ['20_0.92'],
def plot_piecharts_signatures(exposures_path, type_mut, type_extraction,
                              figsize, min_val):

    colors_full = {
        "SBS": {
            25: '#003d7c',
            10: '#224ba5',
            5: '#4459ce',
            2.5: '#6767f7',
            1: '#9968c8',
            0.5: '#cc6999',
            0.25: '#ff6b6b',
            0.1: '#ff8962',
            0.05: '#ffa759',
            0: '#ffc651'
        },
        "ID": {
            1: '#003d7c',
            0.5: '#224ba5',
            0.1: '#4459ce',
            0.05: '#6767f7',
            0.04: '#9968c8',
            0.03: '#cc6999',
            0.02: '#ff6b6b',
            0.01: '#ff8962',
            0.001: '#ffa759',
            0: '#ffc651'
        },
        "DBS": {
            0.5: '#003d7c',
            0.1: '#224ba5',
            0.05: '#4459ce',
            0.03: '#6767f7',
            0.02: '#9968c8',
            0.01: '#cc6999',
            0.008: '#ff6b6b',
            0.005: '#ff8962',
            0.001: '#ffa759',
            0: '#ffc651'
        }
    }

    colors = colors_full[type_mut]
    result = read_exposures(exposures_path)
    config_params()
    dic_primary_full, _ = return_metadata()

    # result = pd.concat([df, df_mela])
    result = result.fillna(0)
    signatures = result.columns.tolist()

    # get list of similar found signatures in the extraction
    similar = [s for s in signatures if type_mut in s]
    notsimilar = [s for s in signatures if type_mut not in s]

    result['TTYPE'] = [dic_primary_full[t] for t in result.index.tolist()]

    dic_sig = defaultdict(lambda: defaultdict(float))
    dic_proportion = defaultdict(lambda: defaultdict(float))
    for ttype, data in result.groupby(by='TTYPE'):
        data2 = data.copy()
        data2.drop('TTYPE', axis=1, inplace=True)
        for col in data2:
            # we normalize it by the number of MB in the human genome (3234)
            dic_sig[ttype][col] = data2[
                data2[col] > min_val][col].median() / 3234

            if type_mut not in col:
                dic_proportion[ttype][col.split('_')[0]] = len(
                    data2[data2[col] > min_val]) / len(data2)
            else:
                dic_proportion[ttype][col] = len(
                    data2[data2[col] > min_val]) / len(data2)

    medians = pd.DataFrame.from_dict(dic_sig)

    # sorting the already known signatures
    keep_order_similar = defaultdict(list)
    for s in similar:
        number = s.split('_')[1].split(type_mut)[1]
        try:
            keep_n = int(number)
        except Exception:
            keep_n = int(number[:-1])

        keep_order_similar[keep_n].append(str(s))

    order_prev_labels = []
    order_prev = []
    for i in sorted(keep_order_similar, reverse=True):
        all_s = []
        d_equiv = defaultdict(str)
        for sig in keep_order_similar[i]:
            ID_sig = '{}_{}_{}'.format(
                sig.split('_')[1],
                sig.split('_')[2],
                sig.split('_')[0])
            d_equiv[ID_sig] = sig
        sorted_final_k = sorted(d_equiv)
        sorted_sigs_list = [d_equiv[ss] for ss in sorted_final_k]

        for sim, sig in reversed(list(enumerate(sorted_sigs_list, start=1))):
            if len(keep_order_similar[i]) == 1:
                order_prev_labels.append('E-{}{} ({}-like, {})'.format(
                    type_mut,
                    sig.split('_')[0],
                    sig.split('_')[1], round(float(sig.split('_')[2]), 3)))
                order_prev.append(sig)
            else:
                order_prev_labels.append('E-{}{} ({}-like {}, {})'.format(
                    type_mut,
                    sig.split('_')[0],
                    sig.split('_')[1], sim, round(float(sig.split('_')[2]),
                                                  3)))
                order_prev.append(sig)

    no_similar_signatures = medians.loc[notsimilar]
    new_index = [
        int(l.split('_')[0]) for l in no_similar_signatures.index.tolist()
    ]
    no_similar_signatures.index = new_index
    no_similar_signatures.sort_index(inplace=True, ascending=True)

    names_notsimilar = [
        'E-{} {}'.format(type_mut, c)
        for c in no_similar_signatures.index.tolist()[::-1]
    ]

    # merge new and old
    merged = pd.concat([
        no_similar_signatures.sort_index(ascending=False),
        medians.loc[order_prev],
    ])

    # merged = merged.loc[order_prev+small_newset.index.tolist()]
    merged_labels = names_notsimilar + order_prev_labels

    config_params(5)

    fig, ax = plt.subplots(1, 1, figsize=figsize)
    # plt.grid(b=True, which='major',)

    for yval, (i, row) in enumerate(merged.iterrows()):
        for xval, t in enumerate(merged.columns.tolist()):
            val = row[t]
            if val > 0:
                color = None
                for number in sorted(colors.keys(), reverse=True):
                    if (val > number) & (color is None):
                        color_scatter = colors[number]
                        break
                if type_mut in str(i):
                    plt.scatter(xval,
                                yval,
                                c=color_scatter,
                                s=dic_proportion[t][i] * 20)
                else:
                    plt.scatter(xval,
                                yval,
                                c=color_scatter,
                                s=dic_proportion[t][str(i)] * 20)

    ax.set_xticks(np.arange(len(merged.T)))
    ax.set_xticklabels(merged.columns.tolist(), rotation=90)
    ax.set_yticks(np.arange(len(merged)))

    ax.set_yticklabels(merged_labels)
    ax.xaxis.set_ticks_position('top')
    ax.set_axisbelow(True)

    ax.yaxis.grid(color='gray', linestyle='dashed', alpha=0.3)
    ax.xaxis.grid(color='gray', linestyle='dashed', alpha=0.3)

    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['right'].set_visible(False)

    ax.xaxis.set_ticks_position('none')
    ax.yaxis.set_ticks_position('none')

    plt.ylim(-1, len(merged))
    plt.tight_layout()
    plt.savefig('figures/{}/supp1_{}.svg'.format(type_extraction, type_mut))
    plt.savefig('figures/{}/supp1_{}.png'.format(type_extraction, type_mut),
                dpi=600)

    plt.close()
예제 #17
0
def GBM_tzm_plot(total_MGMT, nottreated, total_treated, samples_mmr, fitted):

    sig_temo = 'SBS11'
    config_params(6.5)

    # first we select MMR defitient
    treated_MMR = [i for i in total_treated if i in samples_mmr]

    # then we select MGMT but nor MMR deff
    treated_MGMT = [
        i for i in total_treated
        if ((i not in treated_MMR) & (i in total_MGMT))
    ]

    # then treated without the others
    treated_no_alt = [
        i for i in total_treated
        if (i not in treated_MMR) & (i not in treated_MGMT)
    ]

    fig, ax = plt.subplots(1, 1, figsize=(1.25, 1.5))

    sns.boxplot(data=[
        fitted[[i for i in nottreated if i in fitted.columns]].loc[sig_temo],
        fitted[[i for i in treated_no_alt
                if i in fitted.columns]].loc[sig_temo],
        fitted[[i for i in treated_MGMT if i in fitted.columns]].loc[sig_temo],
        fitted[[i for i in treated_MMR if i in fitted.columns]].loc[sig_temo]
    ],
                color='#ecececff',
                linewidth=1,
                showfliers=False,
                ax=ax)

    ax.scatter([
        0 + np.random.uniform(-0.2, 0.2, 1)[0]
        for i in fitted[[i for i in nottreated
                         if i in fitted.columns]].loc[sig_temo]
    ],
               fitted[[i for i in nottreated
                       if i in fitted.columns]].loc[sig_temo],
               s=15,
               color='#800080ff',
               edgecolor='black',
               lw=0.5)

    ax.scatter([
        1 + np.random.uniform(-0.2, 0.2, 1)[0]
        for i in fitted[[i for i in treated_no_alt
                         if i in fitted.columns]].loc[sig_temo]
    ],
               fitted[[i for i in treated_no_alt
                       if i in fitted.columns]].loc[sig_temo],
               s=15,
               color='#800080ff',
               edgecolor='black',
               lw=0.5)

    ax.scatter([
        2 + np.random.uniform(-0.2, 0.2, 1)[0]
        for i in fitted[[i for i in treated_MGMT
                         if i in fitted.columns]].loc[sig_temo]
    ],
               fitted[[i for i in treated_MGMT
                       if i in fitted.columns]].loc[sig_temo],
               s=15,
               color='#800080ff',
               edgecolor='black',
               lw=0.5)

    ax.scatter([
        3 + np.random.uniform(-0.1, 0.1, 1)[0]
        for i in fitted[[i for i in treated_MMR
                         if i in fitted.columns]].loc[sig_temo]
    ],
               fitted[[i for i in treated_MMR
                       if i in fitted.columns]].loc[sig_temo],
               s=15,
               color='#800080ff',
               edgecolor='black',
               lw=0.5)

    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)

    plt.xticks([0, 1, 2, 3], [
        'TMZ untreated (n ={})'.format(len(nottreated)),
        'MGMT-notmet MMR-prof (n = {})'.format(len(treated_no_alt)),
        'MGMT-met MMR-prof (n = {})'.format(len(treated_MGMT)),
        'MMR-def(n = {})'.format(len(treated_MMR)),
    ],
               rotation=90)

    plt.title('GBM-cohort\n(Wang et al, 2016)')
    plt.ylabel('TMZ related SBS')
    plt.savefig('figures/GBM_tmz.svg')
    plt.close()
예제 #18
0
def plot_dbs(sig, title, outpath, ttype):
    config_params(3)

    fig, axs = plt.subplots(nrows=2,
                            ncols=1,
                            figsize=(3.2, 1),
                            gridspec_kw={'height_ratios': [1, 9]})

    vals = []
    colors_mut = [
        '#1ebff0', '#050708', '#e62725', '#cbcacb', '#a1cf64', '#edc8c5'
    ]

    dbs_color = {
        'AC': '#a6cee3',
        'AT': '#1f78b4',
        'CC': '#b2df8a',
        'CG': '#33a02c',
        'CT': '#fb9a99',
        'GC': '#e3211d',
        'TA': '#fdbf6f',
        'TC': '#ff7f00',
        'TG': '#cab2d6',
        'TT': '#6a3d9a',
    }

    order_color = [
        '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e3211d',
        '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a'
    ]

    order_dbs_list = order_to_plot_dbs()

    vals = sig

    colors = [dbs_color[db.split('_')[0]] for db in order_dbs_list]
    counter_colors = Counter(colors)

    bot = -0.5

    for c in order_color:
        axs[0].barh(
            1,
            counter_colors[c],
            left=bot,
            color=c,
            align='center',
        )
        bot += counter_colors[c]

    axs[0].spines['top'].set_visible(False)
    axs[0].spines['bottom'].set_visible(False)
    axs[0].spines['left'].set_visible(False)
    axs[0].spines['right'].set_visible(False)

    axs[0].get_yaxis().set_visible(False)
    axs[0].get_xaxis().set_visible(False)

    axs[0].set_xlim(-1, 78)

    x = [i for i in range(len(vals))]

    axs[1].axhline(y=0.05, xmin=-1, xmax=96, lw=0.3, color='grey', alpha=0.2)
    axs[1].axhline(y=0.1, xmin=-1, xmax=96, lw=0.3, color='grey', alpha=0.2)
    axs[1].axhline(y=0.15, xmin=-1, xmax=96, lw=0.3, color='grey', alpha=0.2)

    axs[1].bar(x,
               vals,
               color=colors,
               width=0.8,
               linewidth=0,
               align='center',
               alpha=1)
    axs[1].set_xticks(x)

    axs[1].set_xticklabels(
        ['{}{}'.format(
            a[-2],
            a[-1],
        ) for a in order_dbs_list],
        rotation=90,
        fontsize=2,
        verticalalignment="center",
        ha='center',
        color='grey')

    axs[1].set_xlim(-1, 78)

    axs[1].spines['top'].set_visible(False)
    axs[1].set_ylabel('Relative Probability')

    axs[1].spines['right'].set_visible(False)

    plt.setp([axs[1].get_xticklines(), axs[1].get_yticklines()], color='grey')

    axs[1].xaxis.set_ticks_position('none')
    for axis in ['top', 'bottom', 'left', 'right']:
        axs[1].spines[axis].set_linewidth(0.2)

    axs[1].xaxis.set_tick_params(pad=0.5)
    axs[1].yaxis.set_tick_params(pad=0.5, width=0.5)

    plt.tick_params(axis='both', which='both', bottom=False, left=False)
    plt.tight_layout()
    plt.savefig('{}/processes/{}/{}.{}.png'.format(outpath, ttype, ttype,
                                                   title),
                dpi=300,
                bbox_inches='tight')
    plt.savefig('{}/processes/{}/{}.{}.svg'.format(outpath, ttype, ttype,
                                                   title))

    plt.close()
예제 #19
0
def plot_indel(sig, title, outpath, ttype):

    config_params(3)

    fig, axs = plt.subplots(nrows=2,
                            ncols=1,
                            figsize=(3.2, 1),
                            gridspec_kw={'height_ratios': [1, 9]})

    vals = sig

    colors = ['#fdbe6f'] * 6 + ['#ff8002'] * 6 + ['#b0dd8b'] * 6 + ['#36a12e'] * 6 + \
             ['#fdcab5'] * 6 + ['#fc8a6a'] * 6 + ['#f14432'] * 6 + ['#bc191a'] * 6 + \
             ['#d0e1f2'] * 6 + ['#94c4df'] * 6 + ['#4a98c9'] * 6 + ['#1764ab'] * 6 + \
             ['#e1e1ef'] * 1 + ['#b6b6d8'] * 2 + ['#8683bd'] * 3 + ['#62409b'] * 5

    order_colors = [
        '#fdbe6f', '#ff8002', '#b0dd8b', '#36a12e', '#fdcab5', '#fc8a6a',
        '#f14432', '#bc191a', '#d0e1f2', '#94c4df', '#4a98c9', '#1764ab',
        '#e1e1ef', '#b6b6d8', '#8683bd', '#62409b'
    ]

    counter_colors = Counter(colors)

    bot = -0.5

    for c in order_colors:
        axs[0].barh(1, counter_colors[c], left=bot, color=c)
        bot += counter_colors[c]

    axs[0].spines['top'].set_visible(False)
    axs[0].spines['bottom'].set_visible(False)
    axs[0].spines['left'].set_visible(False)
    axs[0].spines['right'].set_visible(False)

    axs[0].get_yaxis().set_visible(False)
    axs[0].get_xaxis().set_visible(False)

    axs[0].get_yaxis().set_visible(False)
    axs[0].get_xaxis().set_visible(False)

    axs[0].set_xlim(-1, 83)

    x = [i for i in range(len(vals))]

    axs[1].axhline(y=0.05, xmin=-1, xmax=83, lw=0.3, color='grey', alpha=0.3)
    axs[1].axhline(y=0.1, xmin=-1, xmax=83, lw=0.3, color='grey', alpha=0.3)
    axs[1].axhline(y=0.15, xmin=-1, xmax=83, lw=0.3, color='grey', alpha=0.3)
    axs[1].bar(x,
               vals,
               color=colors,
               width=0.7,
               linewidth=0,
               align='center',
               alpha=1)
    axs[1].set_xticks(x)
    axs[1].set_xticklabels([i.split('_')[-1] for i in order_to_plot_indel()],
                           fontsize=2,
                           verticalalignment="center",
                           ha='center',
                           color='grey')
    axs[1].set_xlim(-1, 83)

    plt.tight_layout()

    axs[1].spines['top'].set_visible(False)
    axs[1].set_ylabel('Relative Probability')

    axs[1].spines['right'].set_visible(False)

    plt.setp([axs[1].get_xticklines(), axs[1].get_yticklines()], color='grey')

    axs[1].xaxis.set_ticks_position('none')
    for axis in ['top', 'bottom', 'left', 'right']:
        axs[1].spines[axis].set_linewidth(0.2)

    axs[1].xaxis.set_tick_params(pad=0.5)
    axs[1].yaxis.set_tick_params(pad=0.5, width=0.5)

    plt.tick_params(axis='both', which='both', bottom=False, left=False)
    plt.savefig('{}/processes/{}/{}.{}.png'.format(outpath, ttype, ttype,
                                                   title),
                dpi=300,
                bbox_inches='tight')
    plt.savefig('{}/processes/{}/{}.{}.svg'.format(outpath, ttype, ttype,
                                                   title))

    plt.close()
예제 #20
0
def plot_bias_dbs(d_clustered, d_unclustered, outpath, ttype, label1, label2):

    config_params(3)

    final_order = []
    order_plot = order_to_plot_dbs()

    for o in order_plot:
        final_order.append('{}_{}'.format(o, label1))
        final_order.append('{}_{}'.format(o, label2))

    for s in d_clustered.columns:

        sig = d_clustered[s]

        dbs_color = {
            'AC': '#a6cee3',
            'AT': '#1f78b4',
            'CC': '#b2df8a',
            'CG': '#33a02c',
            'CT': '#fb9a99',
            'GC': '#e3211d',
            'TA': '#fdbf6f',
            'TC': '#ff7f00',
            'TG': '#cab2d6',
            'TT': '#6a3d9a',
        }

        fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(10, 4))

        vals = sig
        colors = []

        for db in order_plot:
            colors.append(dbs_color[db.split('_')[0]])
            colors.append(dbs_color[db.split('_')[0]])

        x = [i for i in range(len(colors))]

        sig = []
        for lag, lead in zip(d_clustered[s], d_unclustered[s]):
            sig.append(lag)
            sig.append(lead)

        toplot_full = []
        colors_joinbar = []
        marks = []

        start = 0
        for k, color_db in dbs_color.items():
            len_ks = [db for db in order_plot if db.split('_')[0] == k]
            amount = len(len_ks) * 2
            c = sig[start:start + amount]
            start = start + amount
            c_condition1 = np.sum(c[::2])
            c_condition2 = np.sum(c[1::2])
            colors_joinbar.append(color_db)
            colors_joinbar.append(color_db)
            toplot_full.append(c_condition1)
            toplot_full.append(c_condition2)

            pval = poisson_exact(c_condition1, c_condition2)
            if pval < 0.05:
                mark = '*'
            else:
                mark = ''
            marks.append(mark)
            marks.append(mark)

        xjoin = [i for i in range(len(toplot_full))]
        axs[3].bar(xjoin,
                   toplot_full,
                   color=colors_joinbar,
                   width=0.5,
                   linewidth=0)
        axs[3].set_xticks(xjoin)
        for ix, (mark, val) in enumerate(zip(marks, toplot_full)):
            axs[3].text(xjoin[ix], val + 10, mark)

        start = 0
        bias = ''
        labels = list(dbs_color.keys())
        for indx, pairs in enumerate(chunks(toplot_full, 2)):
            first, second = pairs[0], pairs[1]
            if (first + second) > start:
                start = first + second
                bias = '{}\t{}\t{}\n'.format(labels[indx], first, second)
        with open(
                '{}/processes/{}/{}.{}.max_components.tsv'.format(
                    outpath, ttype, ttype, s), 'wt') as outfile:
            outfile.write(bias)

        axs[0].bar(x, sig, color=colors, width=0.5, linewidth=0)
        axs[0].set_xticks(x)
        axs[0].set_xticklabels(final_order, rotation=90, fontsize=2)
        axs[0].spines['top'].set_visible(False)

        axs[0].set_xlim(-1, 156)

        colors = []
        vector = []
        for lag, lead in zip(sig[0::2], sig[1::2]):
            vector.append(lag - lead)

        for db in order_plot:
            colors.append(dbs_color[db.split('_')[0]])

        x = [i for i in range(len(colors))]

        axs[1].bar(x, vector, color=colors, width=0.5, linewidth=0)
        axs[1].set_xticks(x)
        axs[1].set_xticklabels(order_plot, rotation=90, fontsize=2)
        axs[1].set_xlim(-1, 78)
        axs[1].spines['top'].set_visible(False)

        vector = []
        colors = []
        sig = sig / np.sum(sig)
        for lag, lead in zip(sig[0::2], sig[1::2]):
            vector.append(lag + lead)

        for db in order_plot:
            colors.append(dbs_color[db.split('_')[0]])

        x = [i for i in range(len(colors))]
        axs[2].bar(x, vector, color=colors, width=0.75, linewidth=0)
        axs[2].set_xticks(x)
        axs[2].set_xticklabels(order_plot, rotation=90, fontsize=2)
        axs[2].set_xlim(-1, 78)

        axs[2].spines['top'].set_visible(False)

        axs[0].spines['top'].set_visible(False)
        axs[0].set_ylabel('NMF counts')
        axs[1].set_ylabel('{} - {}'.format(label1, label2))

        axs[2].set_ylabel('Relative Probability')

        plt.setp([axs[0].get_xticklines(), axs[0].get_yticklines()],
                 color='grey')
        plt.setp([axs[1].get_xticklines(), axs[1].get_yticklines()],
                 color='grey')
        plt.setp([axs[2].get_xticklines(), axs[2].get_yticklines()],
                 color='grey')
        plt.setp([axs[3].get_xticklines(), axs[2].get_yticklines()],
                 color='grey')

        axs[0].xaxis.set_ticks_position('none')
        axs[1].xaxis.set_ticks_position('none')
        axs[2].xaxis.set_ticks_position('none')
        axs[3].xaxis.set_ticks_position('none')

        for axis in ['top', 'bottom', 'left', 'right']:
            axs[0].spines[axis].set_linewidth(0.2)
            axs[1].spines[axis].set_linewidth(0.2)
            axs[2].spines[axis].set_linewidth(0.2)
            axs[3].spines[axis].set_linewidth(0.2)
        for indx in [0, 1, 2, 3]:
            axs[indx].xaxis.set_tick_params(pad=0.5)
            axs[indx].yaxis.set_tick_params(pad=0.5, width=0.5)

        plt.tight_layout()

        plt.savefig('{}/processes/{}/{}.{}.png'.format(outpath, ttype, ttype,
                                                       s),
                    dpi=300,
                    bbox_inches='tight')
        plt.savefig('{}/processes/{}/{}.{}.svg'.format(outpath, ttype, ttype,
                                                       s))

        plt.close()

        fig, ax = plt.subplots(1, 1, figsize=(1, 1))
        total_max = np.max([np.max(d_clustered[s]), np.max(d_unclustered[s])])

        plt.plot([0, total_max], [0, total_max], lw=1, alpha=0.4)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)

        plt.scatter(d_clustered[s], d_unclustered[s], s=5, c=colors)
        plt.xlim(0, np.max(d_clustered[s]) + 10)
        plt.ylim(0, np.max(d_unclustered[s]) + 10)

        plt.xlabel(label1)
        plt.ylabel(label2)
        plt.tight_layout()

        plt.savefig('{}/processes/{}/{}.{}.diagonal.png'.format(
            outpath, ttype, ttype, s),
                    dpi=300,
                    bbox_inches='tight')
        plt.savefig('{}/processes/{}/{}.{}.diagonal.svg'.format(
            outpath, ttype, ttype, s))

        plt.close()
예제 #21
0
def plot_snvs(sig, title, outpath, ttype):

    config_params(3)

    fig, axs = plt.subplots(nrows=2,
                            ncols=1,
                            figsize=(3.2, 1),
                            gridspec_kw={'height_ratios': [1, 9]})
    order_plot = order_muts("snv")

    vals = []
    colors = []
    colors_mut = [
        '#1ebff0', '#050708', '#e62725', '#cbcacb', '#a1cf64', '#edc8c5'
    ]
    bot = -0.5
    for ix, c in enumerate(chunks(sig, 16)):
        colors.extend([colors_mut[ix] for s in c])
        axs[0].barh(1, 16, left=bot, color=colors_mut[ix])
        bot += 16
        vals.extend(c)

    axs[0].set_xlim(-1, 96)
    axs[0].spines['top'].set_visible(False)
    axs[0].spines['bottom'].set_visible(False)
    axs[0].spines['left'].set_visible(False)
    axs[0].spines['right'].set_visible(False)

    axs[0].get_yaxis().set_visible(False)
    axs[0].get_xaxis().set_visible(False)

    x = [i for i in range(len(vals))]

    axs[1].axhline(y=0.05, xmin=-1, xmax=96, lw=0.6, color='grey', alpha=0.2)
    axs[1].axhline(y=0.1, xmin=-1, xmax=96, lw=0.6, color='grey', alpha=0.2)
    axs[1].axhline(y=0.15, xmin=-1, xmax=96, lw=0.6, color='grey', alpha=0.2)

    axs[1].bar(x, vals, color=colors, width=0.8, linewidth=0, align='center')
    axs[1].set_xticks(x)
    axs[1].set_xticklabels(
        ['{}{}{}'.format(a[0], a[2], a[-1]) for a in order_plot],
        verticalalignment="center",
        ha='center',
        rotation=90,
        fontsize=2,
        color='grey')

    plt.tight_layout()
    plt.xlim(-1, 96)

    axs[1].spines['top'].set_visible(False)
    axs[1].set_ylabel('Relative Probability')

    axs[1].spines['right'].set_visible(False)

    plt.setp([axs[1].get_xticklines(), axs[1].get_yticklines()], color='grey')

    axs[1].xaxis.set_ticks_position('none')
    for axis in ['top', 'bottom', 'left', 'right']:
        axs[1].spines[axis].set_linewidth(0.2)

    axs[1].xaxis.set_tick_params(pad=0.5)
    axs[1].yaxis.set_tick_params(pad=0.5, width=0.5)

    plt.tick_params(axis='both', which='both', bottom=False, left=False)
    plt.savefig('{}/processes/{}/{}.{}.png'.format(outpath, ttype, ttype,
                                                   title),
                dpi=300,
                bbox_inches='tight')
    plt.savefig('{}/processes/{}/{}.{}.svg'.format(outpath, ttype, ttype,
                                                   title))

    plt.close()
예제 #22
0
def plot_bias_snv(d_clustered, d_unclustered, outpath, ttype, label1, label2):

    config_params(3)

    final_order = []
    order_plot = order_to_plot_snvs()

    for o in order_plot:
        final_order.append('{}_{}'.format(o, label1))
        final_order.append('{}_{}'.format(o, label2))

    for s in d_clustered.columns:

        sig = d_clustered[s]

        fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(10, 4))

        colors = []
        colors_mut = [
            '#1ebff0', '#050708', '#e62725', '#cbcacb', '#a1cf64', '#edc8c5'
        ]

        for ix, c in enumerate(chunks(sig, 16)):
            colors.extend([colors_mut[ix] for s in c])
            colors.extend([colors_mut[ix] for s in c])

        sig = []
        for lag, lead in zip(d_clustered[s], d_unclustered[s]):
            sig.append(lag)
            sig.append(lead)

        # get significance per type
        toplot_full = []
        colors_joinbar = []
        marks = []
        for ix, c in enumerate(chunks(sig, 32)):

            c_condition1 = np.sum(c[::2])
            c_condition2 = np.sum(c[1::2])
            colors_joinbar.append(colors_mut[ix])
            colors_joinbar.append(colors_mut[ix])
            toplot_full.append(c_condition1)
            toplot_full.append(c_condition2)

            pval = poisson_exact(c_condition1, c_condition2)
            if pval < 0.05:
                mark = '*'
            else:
                mark = ''
            marks.append(mark)
            marks.append(mark)

        xjoin = [i for i in range(len(toplot_full))]

        axs[3].bar(xjoin,
                   toplot_full,
                   color=colors_joinbar,
                   width=0.5,
                   linewidth=0)

        start = 0
        bias = ''
        labels = ['C>A', 'C>G', 'C>T', 'T>A', 'T>C', 'T>G']
        for indx, pairs in enumerate(chunks(toplot_full, 2)):
            first, second = pairs[0], pairs[1]
            if (first + second) > start:
                start = first + second
                bias = '{}\t{}\t{}\n'.format(labels[indx], first, second)
        with open(
                '{}/processes/{}/{}.{}.max_components.tsv'.format(
                    outpath, ttype, ttype, s), 'wt') as outfile:
            outfile.write(bias)

        axs[3].set_xticks(xjoin)
        for ix, (mark, val) in enumerate(zip(marks, toplot_full)):
            axs[3].text(xjoin[ix], val + 10, mark)

        x = [i for i in range(len(colors))]

        axs[0].bar(x, sig, color=colors, width=0.5, linewidth=0)
        axs[0].set_xticks(x)
        axs[0].set_xticklabels(final_order, rotation=90, fontsize=2)
        axs[0].spines['top'].set_visible(False)

        axs[0].set_xlim(-1, 192)

        colors = []
        colors_mut = [
            '#1ebff0', '#050708', '#e62725', '#cbcacb', '#a1cf64', '#edc8c5'
        ]
        vector = []
        for lag, lead in zip(sig[0::2], sig[1::2]):
            vector.append(lag - lead)

        for ix, c in enumerate(chunks(vector, 16)):
            colors.extend([colors_mut[ix] for s in c])

        x = [i for i in range(len(colors))]

        axs[1].bar(x, vector, color=colors, width=0.5, linewidth=0)
        axs[1].set_xticks(x)
        axs[1].set_xticklabels(order_plot, rotation=90, fontsize=2)
        axs[1].set_xlim(-1, 96)
        axs[1].spines['top'].set_visible(False)

        vector = []
        colors = []

        sig = sig / np.sum(sig)
        for lag, lead in zip(sig[0::2], sig[1::2]):
            vector.append(lag + lead)

        for ix, c in enumerate(chunks(vector, 16)):
            colors.extend([colors_mut[ix] for s in c])

        x = [i for i in range(len(colors))]
        axs[2].bar(x, vector, color=colors, width=0.75, linewidth=0)
        axs[2].set_xticks(x)
        axs[2].set_xticklabels(order_plot, rotation=90, fontsize=2)
        axs[2].set_xlim(-1, 96)
        axs[2].spines['top'].set_visible(False)

        axs[0].spines['top'].set_visible(False)

        plt.setp([axs[0].get_xticklines(), axs[0].get_yticklines()],
                 color='grey')
        plt.setp([axs[1].get_xticklines(), axs[1].get_yticklines()],
                 color='grey')
        plt.setp([axs[2].get_xticklines(), axs[2].get_yticklines()],
                 color='grey')
        plt.setp([axs[3].get_xticklines(), axs[2].get_yticklines()],
                 color='grey')

        axs[0].xaxis.set_ticks_position('none')
        axs[1].xaxis.set_ticks_position('none')
        axs[2].xaxis.set_ticks_position('none')
        axs[3].xaxis.set_ticks_position('none')

        for axis in ['top', 'bottom', 'left', 'right']:
            axs[0].spines[axis].set_linewidth(0.2)
            axs[1].spines[axis].set_linewidth(0.2)
            axs[2].spines[axis].set_linewidth(0.2)
            axs[3].spines[axis].set_linewidth(0.2)
        for indx in [0, 1, 2, 3]:
            axs[indx].xaxis.set_tick_params(pad=0.5)
            axs[indx].yaxis.set_tick_params(pad=0.5, width=0.5)

        plt.tight_layout()

        plt.savefig('{}/processes/{}/{}.{}.png'.format(outpath, ttype, ttype,
                                                       s),
                    dpi=300,
                    bbox_inches='tight')
        plt.savefig('{}/processes/{}/{}.{}.svg'.format(outpath, ttype, ttype,
                                                       s))

        plt.close()

        fig, ax = plt.subplots(1, 1, figsize=(1, 1))
        total_max = np.max([np.max(d_clustered[s]), np.max(d_unclustered[s])])

        plt.plot([0, total_max], [0, total_max], lw=1, alpha=0.4)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)

        plt.scatter(d_clustered[s], d_unclustered[s], s=5, c=colors)
        plt.xlim(0, np.max(d_clustered[s]) + 1000)
        plt.ylim(0, np.max(d_unclustered[s]) + 1000)

        plt.xlabel(label1)
        plt.ylabel(label2)

        plt.tight_layout()
        plt.savefig('{}/processes/{}/{}.{}.diagonal.png'.format(
            outpath, ttype, ttype, s),
                    dpi=300,
                    bbox_inches='tight')
        plt.savefig('{}/processes/{}/{}.{}.diagonal.svg'.format(
            outpath, ttype, ttype, s))

        plt.close()