Exemplo n.º 1
0
def add_sample_legend(ax):
    handles, labels = ax.get_legend_handles_labels()

    h1s = np.array(
        handles
    )[xlsx_tab.set_index(
        'SAMPLE').loc[labels].reset_index().reset_index().set_index('CODE')
      #         .loc[xlsx_tab.query('GENDER=="male"')['CODE'],'index'].dropna().astype(int)
      .reindex(index=xlsx_tab.query('GENDER=="male"')['CODE'])
      ['index'].dropna().astype(int).values]

    h2s = np.array(handles)[xlsx_tab.set_index(
        'SAMPLE').loc[labels].reset_index().reset_index().set_index(
            'CODE').loc[xlsx_tab.query('GENDER=="female"')['CODE'],
                        'index'].values]

    lg1 = ax.legend(
        h1s,
        map(lambda x: '{:02d}'.format(x), np.arange(1, h1s.shape[0] + 1)),
        loc='upper left',
        title='M',
        prop={'size': smallsize},
        handletextpad=-0.5,
        columnspacing=-0.5,
        labelspacing=0,
        edgecolor='k',
        ncol=3,
        markerfirst=False,
    )

    lg1.get_title().set_fontsize(smallsize)

    for h in lg1.legendHandles:
        h.set_alpha(0.9)
        h.set_sizes([50])

    ax.add_artist(lg1)

    lg2 = ax.legend(
        h2s,
        map(lambda x: '{:02d}'.format(x), np.arange(1, h2s.shape[0] + 1)),
        loc='upper right',
        title='F',
        prop={'size': smallsize},
        handletextpad=-0.5,
        columnspacing=-0.5,
        labelspacing=0,
        edgecolor='k',
        ncol=3,
        markerfirst=False,
        borderpad=0.2,
    )

    lg2.get_title().set_fontsize(smallsize)

    for h in lg2.legendHandles:
        h.set_alpha(0.9)
        h.set_sizes([50])
Exemplo n.º 2
0
def add_clone_legend(ax, FSAMPLE=True):
    handles, labels = ax.get_legend_handles_labels()

    legend_df1 = (
        pd.Series(labels).drop_duplicates().reset_index().set_index(0).merge(
            xlsx_tab.set_index('SAMPLE'),
            left_index=True,
            right_index=True,
        ).sort_values('CODE'))

    if not FSAMPLE:
        legend_df1 = legend_df1.replace('M11', 'M10')

    ax.legend(
        np.array(handles).flatten()[legend_df1['index'].values],
        legend_df1['CODE'].str.replace('M', '').str.replace('F', '').values,
        loc='upper right',
        prop={'size': smallsize},
        handletextpad=0.5,
        columnspacing=-0.5,
        labelspacing=0,
        edgecolor='k',
        ncol=2,
        markerfirst=False,
        borderpad=0.2,
    )

    for h in ax.get_legend().legendHandles:
        h.set_width(10)

    lg = ax.get_legend()
    return lg
Exemplo n.º 3
0
def female_genome_plot(ax1, ax2):
# f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(12,2), gridspec_kw={'height_ratios':[3,1], 'hspace':0.1})

    n = pd.concat(
        [
            cells_tab, 
            xlsx_tab.set_index('SAMPLE')
        ], axis=1, sort=False
    ).query('GENDER=="female"')['n_pf'].sum()

    big_cnv_gain =(
        sum_df
        .loc[big_cnv_idx]
        .query('cnvTag=="Gain" & Gender=="female"')
    )

    big_cnv_loss =(
        sum_df
        .loc[big_cnv_idx]
        .query('cnvTag=="Loss" & Gender=="female"')
    )

    bin_bed  = BedTools.from_dataframe(ref.loc[good_df.index])
    gain_bed = BedTools.from_dataframe(big_cnv_gain)
    loss_bed = BedTools.from_dataframe(big_cnv_loss)

    gain_s = BedTools.intersect(self=bin_bed,b=gain_bed,wa=True,c=True).to_dataframe().set_index(good_df.index)['score']
    loss_s = BedTools.intersect(self=bin_bed,b=loss_bed,wa=True,c=True).to_dataframe().set_index(good_df.index)['score']

    tmpCleanUp()

    ax1.plot(np.arange(female_fig_df.columns.shape[0]),  100*gain_s[female_fig_df.columns].fillna(0)/n, '-', lw=2, color='darkorange',  label='Gain')
    ax2.plot(np.arange(female_fig_df.columns.shape[0]),  100*gain_s[female_fig_df.columns].fillna(0)/n, '-', lw=2, color='darkorange',  label='Gain')
    ax1.plot(np.arange(female_fig_df.columns.shape[0]), -100*loss_s[female_fig_df.columns].fillna(0)/n, '-', lw=2, color='deepskyblue', label='Loss')
    ax2.plot(np.arange(female_fig_df.columns.shape[0]), -100*loss_s[female_fig_df.columns].fillna(0)/n, '-', lw=2, color='deepskyblue', label='Loss')

    # Plot chroms. boundary
    g = ref.groupby('space').min()['abs_pos']

    for pos in chr_pos:
        ax1.plot(np.tile(pos,10), np.linspace(-5, 9, 10), '-', color='black', alpha=0.5, lw=1, zorder=0)
        ax2.plot(np.tile(pos,10), np.linspace(-5, 9, 10), '-', color='black', alpha=0.5, lw=1, zorder=0)

    ax1.set_xticks([])
    ax2.set_xticks([])
    ax1.tick_params(axis='x', bottom=False, length=0)
    ax2.tick_params(axis='x', bottom=False, length=0)
    ax1.tick_params(axis='y', length=5)
    ax2.tick_params(axis='y', length=5)

    ax1.set_ylim(-0.4, 0.4)
    ax2.set_ylim(-4.5, -1.5)

    ax1.spines['bottom'].set_visible(False)
    ax2.spines['top'].set_visible(False)

#     d = 0.01  # how big to make the diagonal lines in axes coordinates
#     kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False)
#     ax1.plot((-d, +d), (0, 0), **kwargs)        # top-left diagonal
#     ax1.plot((1 - d, 1 + d), (0, 0), **kwargs)  # top-right diagonal

#     kwargs.update(transform=ax2.transAxes)  # switch to the bottom axes
#     ax2.plot((-d, +d), (1, 1), **kwargs)  # bottom-left diagonal
#     ax2.plot((1 - d, 1 + d), (1, 1), **kwargs)  # bottom-right diagonal

    ax1.set_ylabel('Freq.\n(%)', fontsize=midsize)

    ax1.set_yticks([0.3,0,-0.3])
    ax1.set_yticklabels(['0.3','0.0','0.3'], fontsize=smallsize)

    ax2.set_yticks([-3])
    ax2.set_yticklabels(['3'], fontsize=smallsize)

    return
Exemplo n.º 4
0
def male_genome_plot(ax, FSAMPLE=True):
    # fig, ax = plt.subplots(figsize=(20,4))

    n = pd.concat(
        [
            cells_tab, 
            xlsx_tab.set_index('SAMPLE')
        ], axis=1, sort=False
    ).query('GENDER=="male"')

    big_cnv_gain =(
        sum_df
        .loc[big_cnv_idx]
        .query('cnvTag=="Gain" & Gender=="male"')
    )

    big_cnv_loss =(
        sum_df
        .loc[big_cnv_idx]
        .query('cnvTag=="Loss" & Gender=="male"')
    )
    
    if not FSAMPLE:
        big_cnv_gain = big_cnv_gain.query('Sample!="$FSAMPLE"')
        big_cnv_loss = big_cnv_loss.query('Sample!="$FSAMPLE"')
        n = n.drop('$FSAMPLE')['n_pf'].sum()
    else:
        n = n['n_pf'].sum()
    

    bin_bed  = BedTools.from_dataframe(ref.loc[good_df.index])
    gain_bed = BedTools.from_dataframe(big_cnv_gain)
    loss_bed = BedTools.from_dataframe(big_cnv_loss)

    gain_s = BedTools.intersect(self=bin_bed,b=gain_bed,wa=True,c=True).to_dataframe().set_index(good_df.index)['score']
    loss_s = BedTools.intersect(self=bin_bed,b=loss_bed,wa=True,c=True).to_dataframe().set_index(good_df.index)['score']

    tmpCleanUp()
    
    ax.plot(np.arange(male_fig_df.columns.shape[0]),  100*gain_s[male_fig_df.columns].fillna(0)/n, '-', lw=2, color='darkorange',  label='Gain', zorder=1)
    ax.plot(np.arange(male_fig_df.columns.shape[0]), -100*loss_s[male_fig_df.columns].fillna(0)/n, '-', lw=2, color='deepskyblue', label='Loss', zorder=1)

    # Plot chroms. boundary
    g = ref.groupby('space').min()['abs_pos']

    for pos in chr_pos:
        ax.plot(np.tile(pos,10), np.linspace(-3, 9, 10), '-', color='black', alpha=0.5, lw=1, zorder=0)

    ax.set_ylabel('Freq.\n(%)', fontsize=midsize)
    ax.set_xticks( pd.Series(chr_pos).rolling(2).mean()[1:] )
    ax.set_xticklabels(
        ref['space'].drop_duplicates()[:-1].str.replace('chr','').replace('19','    19').replace('21','    21').replace('X','    X'), 
        fontsize=smallsize-4,
        ha='center',
        rotation=90,
    )
    ax.xaxis.set_ticks_position('top')
    ax.set_ylim(-0.4,0.4)
    ax.set_yticks( [-0.3, 0, 0.3] )
    ax.set_yticklabels( [0.3, 0 ,0.3], fontsize=smallsize )

    ax.tick_params(axis='x', bottom=False, length=0, pad=5)
    ax.tick_params(axis='y', length=5)
    
    return 
Exemplo n.º 5
0
# +
male_heatmap   = pd.read_csv('male_heatmap.csv',   index_col=0)
female_heatmap = pd.read_csv('female_heatmap.csv', index_col=0)

cmap_CNV = LinearSegmentedColormap.from_list('forCNV', ['blueviolet', 'deepskyblue', 'black', 'darkorange', 'orangered'])

# # Sort fraction
tmp_df = sum_df.loc[big_cnv_idx].sort_values('chrom_fraction', ascending=False)
tmp_df['chrom_fraction'] = (tmp_df['chrom_fraction']/10).round()
# # Sort chroms.
heatmap_order = (
    tmp_df.set_index('Cell')
    .loc[tmp_df['Cell']]
    .reset_index()
    .groupby('Cell',sort=False).first()
    .replace({'Sample':xlsx_tab.set_index('SAMPLE')['CODE'].to_dict()})
#     .sort_values(['chr_id','chrom_fraction','cnvTag','binSize','Cell'], ascending=[True, False, True, False, True])
    .sort_values(
        ['chr_id','chrom_fraction','cnvTag','Sample','binSize','Cell'], 
        ascending=[True, False, True, True, False, True]
    )
    .index
)

# # Sort fraction
# tmp_df = cell_chr_cnv_size.sort_values('chrom_fraction', ascending=False)
# # Sort chroms.
# heatmap_order = (
#     tmp_df.reset_index().set_index('Cell')
#     .loc[tmp_df['Cell']]
#     .reset_index()
Exemplo n.º 6
0
matrix_2d = pd.read_pickle('matrix_2d.pkl')

# +
cell_info = pd.concat([
    sum_df.loc[big_cnv_idx].groupby(['Cell', 'Chromosome', 'cnvTag'
                                     ])['Gender'].first(),
    sum_df.loc[big_cnv_idx].groupby(['Cell', 'Chromosome', 'cnvTag'
                                     ])['Sample'].first(),
    sum_df.loc[big_cnv_idx].groupby(['Cell', 'Chromosome', 'cnvTag'
                                     ])['chrom_fraction'].max(),
],
                      axis=1).reset_index().set_index('Cell')

cell_info['Age'] = cell_info['Sample'].replace(
    xlsx_tab.set_index('SAMPLE')['AGE'].to_dict())

matrix_2d = pd.merge(
    matrix_2d,
    cell_info,
    right_index=True,
    left_index=True,
    how='left',
)

matrix_2d['Whole'] = 'False'
matrix_2d['Whole'] = matrix_2d.where(matrix_2d['chrom_fraction'] < 80,
                                     other='True')['Whole']

# +
two_colors = sns.xkcd_palette(['bright blue', 'red'])
Exemplo n.º 7
0
def clone_plot(ax1, ax2, ax3, ax4, FSAMPLE=True):

    fig_df3 = pd.DataFrame(columns=['Sample', 'Clone_size', 'Cells'])
    for s, df in male_edis_df.query('d<10').groupby('s'):
        G = nx.Graph()
        G.add_nodes_from(df['a'].unique().tolist())
        G.add_edges_from(df[['a', 'b']].values, length=df['d'].values)
        for h in nx.connected_components(G):
            fig_df3 = fig_df3.append(
                {
                    'Sample': s,
                    'Clone_size': len(h),
                    'Cells': list(h)
                },
                ignore_index=True)

    fig_df4 = pd.DataFrame(columns=['Sample', 'Clone_size', 'Cells'])
    for s, df in female_edis_df.query('d<10').groupby('s'):
        G = nx.Graph()
        G.add_nodes_from(df['a'].unique().tolist())
        G.add_edges_from(df[['a', 'b']].values, length=df['d'].values)
        for h in nx.connected_components(G):
            fig_df4 = fig_df4.append(
                {
                    'Sample': s,
                    'Clone_size': len(h),
                    'Cells': list(h)
                },
                ignore_index=True)

    for g, df in fig_df3.query('Clone_size>2').groupby('Clone_size'):
        df = df.set_index('Sample').loc[
            xlsx_tab['SAMPLE']].dropna().reset_index().copy()
        df['ncum'] = np.arange(df.shape[0])
        df.apply(lambda row: ax1.bar(g,
                                     1,
                                     bottom=row['ncum'],
                                     width=0.6,
                                     color=color_dict[row['Sample']],
                                     edgecolor='k'),
                 axis=1)
        df.apply(lambda row: ax2.bar(g,
                                     1,
                                     bottom=row['ncum'],
                                     width=0.6 * 6 * 5 / 4,
                                     color=color_dict[row['Sample']],
                                     edgecolor='k',
                                     label=row['Sample']),
                 axis=1)

    for g, df in fig_df4.query('Clone_size>2').groupby('Clone_size'):
        df = df.set_index('Sample').loc[
            xlsx_tab['SAMPLE']].dropna().reset_index().copy()
        df['ncum'] = np.arange(df.shape[0])
        df.apply(lambda row: ax3.bar(g,
                                     1,
                                     bottom=row['ncum'],
                                     width=0.6,
                                     color=color_dict[row['Sample']],
                                     edgecolor='k'),
                 axis=1)
        df.apply(lambda row: ax4.bar(g,
                                     1,
                                     bottom=row['ncum'],
                                     width=0.6 * 6 * 5 / 4,
                                     color=color_dict[row['Sample']],
                                     edgecolor='k',
                                     label=row['Sample']),
                 axis=1)

#     ax1.set_xlim(0,32)
#     ax3.set_xlim(0,32)
#     ax2.set_xlim(40,120)
#     ax4.set_xlim(40,120)
#     ax1.set_xticks(np.arange(0,31,10))
#     ax3.set_xticks(np.arange(0,31,10))
#     ax2.set_xticks(np.arange(50,121,20))
#     ax4.set_xticks(np.arange(50,121,20))

    ax1.set_xlim(0, 15)
    ax3.set_xlim(0, 15)
    ax2.set_xlim(25, 115)
    ax4.set_xlim(25, 115)
    ax1.set_xticks(np.arange(0, 20, 5))
    ax3.set_xticks(np.arange(0, 20, 5))
    ax2.set_xticks(np.arange(30, 121, 20))
    ax4.set_xticks(np.arange(30, 121, 20))

    ax1.set_ylim(0, 15)
    ax2.set_ylim(0, 15)
    ax3.set_ylim(0, 15)
    ax4.set_ylim(0, 15)

    ax1.set_yticks(np.arange(0, 16, 5))
    ax3.set_yticks(np.arange(0, 16, 5))
    ax2.set_yticks([])
    ax4.set_yticks([])

    #     ax2.set_title(' '*12+'Male', fontsize=bigsize)
    #     ax4.set_title(' '*12+'Female', fontsize=bigsize)
    #     ax1.set_xlabel(' '*32+'Clone size', fontsize=midsize)
    #     ax3.set_xlabel(' '*32+'Clone size', fontsize=midsize)

    ax1.spines['right'].set_visible(False)
    ax2.spines['left'].set_visible(False)
    ax3.spines['right'].set_visible(False)
    ax4.spines['left'].set_visible(False)

    ax1.set_ylabel('Counts', fontsize=midsize)

    ax1.tick_params(axis='both', labelsize=smallsize, length=5)
    ax2.tick_params(axis='both', labelsize=smallsize, length=5)
    ax3.tick_params(axis='both', labelsize=smallsize, length=5)
    ax4.tick_params(axis='both', labelsize=smallsize, length=5)

    d = 0.015  # how big to make the diagonal lines in axes coordinates
    kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False)
    ax1.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs)
    kwargs.update(transform=ax2.transAxes)
    ax1.plot((-d * 3 / 2, +d * 3 / 2), (1 - d, 1 + d), **kwargs)
    kwargs.update(transform=ax1.transAxes)
    ax2.plot((1 - d, 1 + d), (-d, +d), **kwargs)
    kwargs.update(transform=ax2.transAxes)
    ax2.plot((-d * 3 / 2, +d * 3 / 2), (-d, +d), **kwargs)

    kwargs = dict(transform=ax3.transAxes, color='k', clip_on=False)
    ax3.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs)
    kwargs.update(transform=ax4.transAxes)
    ax3.plot((-d * 3 / 2, +d * 3 / 2), (1 - d, 1 + d), **kwargs)
    kwargs.update(transform=ax3.transAxes)
    ax4.plot((1 - d, 1 + d), (-d, +d), **kwargs)
    kwargs.update(transform=ax4.transAxes)
    ax4.plot((-d * 3 / 2, +d * 3 / 2), (-d, +d), **kwargs)

    anot_kws = {
        'fontsize': smallsize,
        'ha': 'center',
        'va': 'center',
        'arrowprops': dict(arrowstyle="->")
    }

    n_df = fig_df3.query('Clone_size==31')
    lid = xlsx_tab.set_index('SAMPLE').loc[n_df['Sample'], 'CODE'].values[0]

    if not FSAMPLE:
        lid = lid.replace('M11', 'M10')

    cid = sum_df.set_index('Cell').loc[n_df['Cells'].tolist()[0],
                                       'Space'].value_counts().index[0]
    ax2.annotate(lid + '\n' + cid, xy=(31, 1.5), xytext=(50, 5), **anot_kws)

    n_df = fig_df4.query('Clone_size==11')
    lid = xlsx_tab.set_index('SAMPLE').loc[n_df['Sample'], 'CODE'].values[0]
    cid = sum_df.set_index('Cell').loc[n_df['Cells'].tolist()[0],
                                       'Space'].value_counts().index[0]
    ax3.annotate(
        lid + '\n' + cid,
        xy=(11, 1.5),
        xytext=(7, 10),
        fontsize=smallsize,
        ha='center',
        va='center',
        color=two_colors[1],
        arrowprops=dict(arrowstyle="->"),
        #         bbox=dict(boxstyle='round4', fc="w", pad=0.5)
    )

    n_df = fig_df4.query('Clone_size==12')
    lid = xlsx_tab.set_index('SAMPLE').loc[n_df['Sample'], 'CODE'].values[0]
    cid = sum_df.set_index('Cell').loc[n_df['Cells'].tolist()[0],
                                       'Space'].value_counts().index[0]
    ax3.annotate(lid + '\n' + cid,
                 xy=(12, 1.5),
                 xytext=(13, 10),
                 **anot_kws,
                 zorder=1)

    n_df = fig_df4.query('Clone_size==13')
    lid = xlsx_tab.set_index('SAMPLE').loc[n_df['Sample'], 'CODE'].values[0]
    cid = sum_df.set_index('Cell').loc[n_df['Cells'].tolist()[0],
                                       'Space'].value_counts().index[0]
    ax3.annotate(lid + '\n' + cid,
                 xy=(13, 1.5),
                 xytext=(14, 5),
                 **anot_kws,
                 zorder=1)

    n_df = fig_df4.query('Clone_size==31')
    lid = xlsx_tab.set_index('SAMPLE').loc[n_df['Sample'], 'CODE'].values[0]
    cid = sum_df.set_index('Cell').loc[n_df['Cells'].tolist()[0],
                                       'Space'].value_counts().index[0]
    ax4.annotate(lid + '\n' + cid,
                 xy=(31, 2),
                 xytext=(13, 10),
                 **anot_kws,
                 zorder=0,
                 color='w',
                 alpha=0)

    n_df = fig_df4.query('Clone_size==46')
    lid = xlsx_tab.set_index('SAMPLE').loc[n_df['Sample'], 'CODE'].values[0]
    cid = sum_df.set_index('Cell').loc[n_df['Cells'].tolist()[0],
                                       'Space'].value_counts().index[0]
    ax4.annotate(lid + '\n' + cid, xy=(46, 1.5), xytext=(40, 7), **anot_kws)

    n_df = fig_df4.query('Clone_size==53')
    lid = xlsx_tab.set_index('SAMPLE').loc[n_df['Sample'], 'CODE'].values[0]
    cid = sum_df.set_index('Cell').loc[n_df['Cells'].tolist()[0],
                                       'Space'].value_counts().index[0]
    ax4.annotate(lid + '\n' + cid, xy=(53, 1.5), xytext=(70, 7), **anot_kws)

    n_df = fig_df4.query('Clone_size==105')
    lid = xlsx_tab.set_index('SAMPLE').loc[n_df['Sample'], 'CODE'].values[0]
    cid = sum_df.set_index('Cell').loc[n_df['Cells'].tolist()[0],
                                       'Space'].value_counts().index[0]
    ax4.annotate(lid + '\n' + cid, xy=(105, 1.5), xytext=(100, 7), **anot_kws)

    return fig_df3, fig_df4
Exemplo n.º 8
0
        '_', expand=True)[0].unique()[0]]
    nx.draw_networkx(
        G,
        pos=pos,
        nodelist=h,
        node_color=[s_color],
        node_size=70,
        edgecolors='k',
        with_labels=False,
        ax=ax,
        zorder=1,
    )
    s = pd.DataFrame(pos).T.loc[h]
    note = sum_df.set_index('Cell').loc[h].groupby(
        ['Space', 'cnvTag'])['Pos'].count().sort_values().index[-1]
    code = xlsx_tab.set_index('SAMPLE').loc[
        s.index.str.split('_', expand=True).to_frame()[0].unique()[0], 'CODE']

    #     if code not in lg_dict:
    #         ax.scatter(xmax+1e2, ymax+1e2, s=70, color=s_color, label=code, zorder=0)
    #         lg_dict[code] = True

    if len(h) > 9:
        ax.text(s.mean()[0] + 85,
                s.mean()[1] - 85,
                cid,
                ha='center',
                va='center',
                fontsize=midsize)
        ax.text(xmax + 200,
                ymax - cid * 100,
                'C{}: {} {} {}'.format(cid, code, note[0], note[1]),
Exemplo n.º 9
0
def kde_plot_chrX(chroms, ax1, ax2, tag):
    
    if tag == 'size':
        fig_df = snp_per_base
        
        xi, yi = np.mgrid[
            0 : 5 : 100*1j, 
            0 : 5 : 100*1j
        ]
#         x_label = r'$\dfrac{{\mathrm{{{}\,SNPs\,on\,{}}}}}{{\mathrm{{Size\,of\,{}}}}}$'.format('\#',chroms,chroms)
#         y_label = r'$\dfrac{{\mathrm{{{}\,SNPs\,on\,{}}}}}{{\mathrm{{Size\,of\,{}}}}}$'.format('\#','chrX','chrX')
        x_label = r'$\dfrac{{\mathrm{{{}\,of\,SNPs\,reads}}}}{{\mathrm{{Size\,(Mb)}}}}\;\mathrm{{on\;{}}}$'.format('\#',chroms)
        y_label = r'$\dfrac{{\mathrm{{{}\,of\,SNPs\,reads}}}}{{\mathrm{{Size\,(Mb)}}}}\;\mathrm{{on\;{}}}$'.format('\#','chrX')
        ax1.set_ylim(0,1.2)
        ax2.set_ylim(0,1.2)
        ax1.set_yticks([0,0.4,0.8,1.2])
        ax2.set_yticks([0,0.4,0.8,1.2])
        ax1.set_xlim(0,3.6)
        ax2.set_xlim(0,3.6)
        levels = np.arange(0,7)
        extend = 'neither'
    elif tag == 'reads':
        fig_df = snp_per_reads

        xi, yi = np.mgrid[
            0 : 100 : 100*1j, 
            0 : 100 : 100*1j
        ]
#         x_label = r'$\dfrac{{\mathrm{{{}\,SNPs\,on\,{}}}}}{{\mathrm{{{}\,Reads\,on\,{}}}}}$'.format('\#',chroms,'\#',chroms)
#         y_label = r'$\dfrac{{\mathrm{{{}\,SNPs\,on\,{}}}}}{{\mathrm{{{}\,Reads\,on\,{}}}}}$'.format('\#','chrX','\#','chrX')
        x_label = r'$\dfrac{{\mathrm{{{}\,of\,SNPs\,reads}}}}{{\mathrm{{{}\,reads\,(M)}}}}\;\mathrm{{on\;{}}}$'.format('\#','\#',chroms)
        y_label = r'$\dfrac{{\mathrm{{{}\,of\,SNPs\,reads}}}}{{\mathrm{{{}\,reads\,(M)}}}}\;\mathrm{{on\;{}}}$'.format('\#','\#','chrX')
        ax1.set_xlim(30,80)
        ax2.set_xlim(30,80)
        ax1.set_ylim(10,35)
        ax2.set_ylim(5,30)
        levels = np.arange(0,0.018,0.002)
        extend = 'max'
    else:
        return 
    
#     .drop(sum_df.loc[big_cnv_idx].query('Sample=="XXX"')['Cell'].unique())
    k_XXX = gaussian_kde( fig_df.loc[ fig_df.index[fig_df.index.str.contains('XXX')], [chroms,'chrX']].T )
    k_XXX   = gaussian_kde( fig_df.loc[ fig_df.index[fig_df.index.str.contains('XXX')  ], [chroms,'chrX']].T )

    z_XXX = k_XXX( np.vstack([xi.flatten(), yi.flatten()]) )
    z_XXX   = k_XXX(   np.vstack([xi.flatten(), yi.flatten()]) )

    cs1 = ax1.contourf(xi, yi, z_XXX.reshape(xi.shape), levels=levels, cmap='Greens', zorder=0, extend=extend)
    cs2 = ax2.contourf(xi, yi, z_XXX.reshape(xi.shape), levels=levels,   cmap='Greens', zorder=0, extend=extend)

    ax1.plot(
        fig_df.loc[XXX_pie_df.query('pA==True').index, chroms], 
        fig_df.loc[XXX_pie_df.query('pA==True').index, 'chrX'], 
        '.', ms=15, alpha=0.7, markeredgecolor='k', color=two_colors[0], label='Maternal'
    )
    ax1.plot(
        fig_df.loc[XXX_pie_df.query('pB==True').index, chroms], 
        fig_df.loc[XXX_pie_df.query('pB==True').index, 'chrX'], 
        '.', ms=15, alpha=0.7, markeredgecolor='k', color=two_colors[1], label='Paternal'
    )
    ax1.plot(
        fig_df.loc[XXX_pie_df.query('~(pA or pB)').index, chroms], 
        fig_df.loc[XXX_pie_df.query('~(pA or pB)').index, 'chrX'], 
        '.', ms=15, alpha=0.7, markeredgecolor='k', color='gray', label='Undeterminate'
    )

    ax2.plot(
        fig_df.loc[XXX_pie_df.query('pA==True').index, chroms], 
        fig_df.loc[XXX_pie_df.query('pA==True').index, 'chrX'], 
        '.', ms=15, alpha=0.7, markeredgecolor='k', color=two_colors[0], label='Maternal'
    )
    ax2.plot(
        fig_df.loc[XXX_pie_df.query('pB==True').index, chroms], 
        fig_df.loc[XXX_pie_df.query('pB==True').index, 'chrX'], 
        '.', ms=15, alpha=0.7, markeredgecolor='k', color=two_colors[1], label='Paternal'
    )
    ax2.plot(
        fig_df.loc[XXX_pie_df.query('~(pA or pB)').index, chroms], 
        fig_df.loc[XXX_pie_df.query('~(pA or pB)').index, 'chrX'], 
        '.', ms=15, alpha=0.7, markeredgecolor='k', color='gray', label='Undeterminate'
    )

    ax1.set_ylabel(y_label, fontsize=midsize)
    ax2.set_ylabel('')

    ax1.set_xlabel(x_label, fontsize=midsize)
    ax2.set_xlabel(x_label, fontsize=midsize)

    ax1.tick_params(length=5, labelsize=smallsize)
    ax2.tick_params(length=5, labelsize=smallsize)

    ax1.set_title(xlsx_tab.set_index('SAMPLE').loc['XXX','CODE'],fontsize=hugesize)
    ax2.set_title(xlsx_tab.set_index('SAMPLE').loc['XXX',  'CODE'],fontsize=hugesize)

    ax1.legend(loc='upper left', prop={'size':smallsize}, handletextpad=0, frameon=False,)
    ax2.legend(loc='upper left', prop={'size':smallsize}, handletextpad=0, frameon=False,)

    return cs1, cs2
Exemplo n.º 10
0
    Path.CURVE4,
    Path.CLOSEPOLY,
]

np.random.seed(0)
# -

# # Get data

max_alpha = 4
max_pie   = 40

# +
good_cells = good_df.xs(key='copy',axis=1,level=1).columns

sample_code_dict = xlsx_tab.set_index('SAMPLE')['CODE'].to_dict()

chrom_dict = (ref.groupby('space').max()['end']//1e6).astype(int).to_dict()

two_colors = sns.xkcd_palette(['red', 'bright blue'])

def name2code(name):
    code = []
    for i in name.split('_'):
        if i in sample_code_dict:
            code.append(sample_code_dict[i])
        else:
            code.append(i)

    return '_'.join(code)