Python groupby示例，figdata.ref.groupby Python示例

示例#1

0

显示文件

文件： A4_Fig2.py 项目： chAwater/OpenFig

def radar_plot(ax, chrom, FSAMPLE=True):
    
    fig_all_df = cnv_chr_counts.query('Space==@chrom')
    # chrlen = ref.query('space==@chrom').max()['end']/1e6/100
    chrlen = (100*ref.groupby('space')['end'].max()/ref.groupby('space')['end'].max().sum()).loc[chrom]

#     y_all = fig_all_df.set_index('Sample').reindex(index=xlsx_tab['SAMPLE']).fillna(0)['ratio']
    y_all = fig_all_df.set_index('Sample').reindex(index=xlsx_tab['SAMPLE']).fillna(0)['Cell']
    y_all = y_all/chrlen
    
   
    if not FSAMPLE:
        y_all = y_all.drop('$FSAMPLE')
        codes = xlsx_tab.query('SAMPLE!="$FSAMPLE"').replace('M11','M10')['CODE'].values
    else:
        codes = xlsx_tab['CODE'].values


    n_sample = y_all.shape[0]
    x = np.linspace(0,2*np.pi,n_sample+1)
    
    fig_df = pd.DataFrame({'x':x[:-1],'y':y_all})

    ax.bar(
        fig_df['x'],
#         fig_df['y'].clip(upper=2),
        fig_df['y'].clip(upper=1),
        width=0.3,
        facecolor='limegreen',
        edgecolor='k',
        lw=1.5,
        alpha=0.9,
        zorder=5
    )

    ax.bar(
#         fig_df.query('y>2')['x'],
#         (fig_df.query('y>2')['y'])*2/30,
        fig_df.query('y>1')['x'],
        (fig_df.query('y>1')['y']-1)/10,
        width=0.3,
        bottom=1,
        facecolor='gold',
        edgecolor='k',
        lw=1.5,
        alpha=0.9,
        zorder=5
    )

#     ax.set_rlim(0,4.7)
#     ax.set_rlim(-0.7,3.7)
    ax.set_rlim(0,3.7)
    ax.set_rorigin(-0.7)

#     _, r_label = ax.set_rgrids([2,3,4], ['2','15','30'], angle=12, fontsize=smallsize, va='bottom', ha='center')
    _, r_label = ax.set_rgrids([1,1.9,2.9], ['1','10','20'], angle=12, fontsize=smallsize, va='bottom', ha='center')

    r_label[0].set_color('limegreen')
    r_label[1].set_color('darkgoldenrod')
    r_label[2].set_color('darkgoldenrod')

    _, sample_label = ax.set_thetagrids( np.linspace(0,360,n_sample), codes )

    for tobj,angle in zip(sample_label,np.linspace(0,360,n_sample)):
        if 'M' in tobj.get_text():
            tobj.set_color(two_colors[0])
        else:
            tobj.set_color(two_colors[1])
        tobj.set_fontsize(smallsize)


    for tpos,l in zip(x, sample_label):
        if 'M' in l.get_text():
            r=90
        else:
            r=270
    
        ax.text(
#             tpos, 5.5,
            tpos, 4.5,
            l.get_text().replace('F','').replace('M',''),
            va='center', 
            ha='center', 
            fontsize=smallsize, 
            color=l.get_color(), 
            rotation=r-tpos*360/np.pi/2
        )       

    ax.set_title(chrom, fontsize=midsize, pad=midsize)
    ax.tick_params(grid_color='k', grid_linewidth=1, grid_alpha=1, zorder=0, grid_linestyle='--')
    ax.set_theta_zero_location('N')
    ax.set_theta_direction(-1)
    ax.set_xticklabels([])

    return

示例#2

0

显示文件

文件： A4_Fig2.py 项目： chAwater/OpenFig

def female_genome_plot(ax1, ax2):
# f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(12,2), gridspec_kw={'height_ratios':[3,1], 'hspace':0.1})

    n = pd.concat(
        [
            cells_tab, 
            xlsx_tab.set_index('SAMPLE')
        ], axis=1, sort=False
    ).query('GENDER=="female"')['n_pf'].sum()

    big_cnv_gain =(
        sum_df
        .loc[big_cnv_idx]
        .query('cnvTag=="Gain" & Gender=="female"')
    )

    big_cnv_loss =(
        sum_df
        .loc[big_cnv_idx]
        .query('cnvTag=="Loss" & Gender=="female"')
    )

    bin_bed  = BedTools.from_dataframe(ref.loc[good_df.index])
    gain_bed = BedTools.from_dataframe(big_cnv_gain)
    loss_bed = BedTools.from_dataframe(big_cnv_loss)

    gain_s = BedTools.intersect(self=bin_bed,b=gain_bed,wa=True,c=True).to_dataframe().set_index(good_df.index)['score']
    loss_s = BedTools.intersect(self=bin_bed,b=loss_bed,wa=True,c=True).to_dataframe().set_index(good_df.index)['score']

    tmpCleanUp()

    ax1.plot(np.arange(female_fig_df.columns.shape[0]),  100*gain_s[female_fig_df.columns].fillna(0)/n, '-', lw=2, color='darkorange',  label='Gain')
    ax2.plot(np.arange(female_fig_df.columns.shape[0]),  100*gain_s[female_fig_df.columns].fillna(0)/n, '-', lw=2, color='darkorange',  label='Gain')
    ax1.plot(np.arange(female_fig_df.columns.shape[0]), -100*loss_s[female_fig_df.columns].fillna(0)/n, '-', lw=2, color='deepskyblue', label='Loss')
    ax2.plot(np.arange(female_fig_df.columns.shape[0]), -100*loss_s[female_fig_df.columns].fillna(0)/n, '-', lw=2, color='deepskyblue', label='Loss')

    # Plot chroms. boundary
    g = ref.groupby('space').min()['abs_pos']

    for pos in chr_pos:
        ax1.plot(np.tile(pos,10), np.linspace(-5, 9, 10), '-', color='black', alpha=0.5, lw=1, zorder=0)
        ax2.plot(np.tile(pos,10), np.linspace(-5, 9, 10), '-', color='black', alpha=0.5, lw=1, zorder=0)

    ax1.set_xticks([])
    ax2.set_xticks([])
    ax1.tick_params(axis='x', bottom=False, length=0)
    ax2.tick_params(axis='x', bottom=False, length=0)
    ax1.tick_params(axis='y', length=5)
    ax2.tick_params(axis='y', length=5)

    ax1.set_ylim(-0.4, 0.4)
    ax2.set_ylim(-4.5, -1.5)

    ax1.spines['bottom'].set_visible(False)
    ax2.spines['top'].set_visible(False)

#     d = 0.01  # how big to make the diagonal lines in axes coordinates
#     kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False)
#     ax1.plot((-d, +d), (0, 0), **kwargs)        # top-left diagonal
#     ax1.plot((1 - d, 1 + d), (0, 0), **kwargs)  # top-right diagonal

#     kwargs.update(transform=ax2.transAxes)  # switch to the bottom axes
#     ax2.plot((-d, +d), (1, 1), **kwargs)  # bottom-left diagonal
#     ax2.plot((1 - d, 1 + d), (1, 1), **kwargs)  # bottom-right diagonal

    ax1.set_ylabel('Freq.\n(%)', fontsize=midsize)

    ax1.set_yticks([0.3,0,-0.3])
    ax1.set_yticklabels(['0.3','0.0','0.3'], fontsize=smallsize)

    ax2.set_yticks([-3])
    ax2.set_yticklabels(['3'], fontsize=smallsize)

    return

示例#3

0

显示文件

文件： A4_Fig2.py 项目： chAwater/OpenFig

def chr_size_plot(axs, FSAMPLE=True):
# fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6,4))
    
    axx = axs[0]
    ax  = axs[1]
    
    if FSAMPLE:
#         chr_counts = sum_df.loc[big_cnv_idx].groupby('Space')['Pos'].count()
        chr_counts = (
            sum_df
            .loc[big_cnv_idx]
            .groupby(['Space','Cell','cnvTag'])['Pos'].first()
            .reset_index()
            .groupby('Space')['Cell'].count()
        )
    else:
        chr_counts = (
            sum_df
            .loc[big_cnv_idx]
            .query('Sample!="$FSAMPLE"')
            .groupby(['Space','Cell','cnvTag'])['Pos'].first()
            .reset_index()
            .groupby('Space')['Cell'].count()
        )
    
#     x = ref.groupby('space').max().drop(['chrX','chrY'])['end']/1000/1000
#     y = chr_counts.drop(['chrX','chrY'], errors='ignore')
    x = ref.groupby('space').max().drop(['chrY'])['end']/1000/1000
    y = chr_counts.drop(['chrY'], errors='ignore')


    ax.plot( 
        x.drop('chr21'), y.drop('chr21'), '.',
        color='black',
        markersize=8,
    )
    
    ax.plot( 
        x['chr21'], y['chr21'], '.',
        color='red',
        markersize=10,
    )
    
    axx.plot( 
        x['chrX'], y['chrX'], '.',
        color='red',
        markersize=10,
    )
    
    for tchr in ref['space'].unique()[:-2]:
        a = -5
        b = 4

        if tchr == 'chr21':
            a = 0
            b = -13
        elif tchr == 'chr17':
            a = -5
            b = -10
        elif tchr == 'chr11':
            a = 8
            b = 0
        elif tchr == 'chr10':
            a = -8
            b = -3
        elif tchr == 'chr2':
            a = 0
            b = -10
        elif tchr == 'chr18':
            a = 8
            b = -2
        elif tchr == 'chr8':
            a = 5
            b = -3
        elif tchr == 'chr12':
            pass
        elif tchr == 'chr9':
            a = 3
            b = 4
            pass

        ax.text(
            a + ref.groupby('space').max().loc[tchr,'end']/1000/1000,
            b + chr_counts[tchr],
            tchr.replace('chr',''),
            fontsize=10,
            ha='center',
            va='center',
        )

    ax.set_xlim(0,300)
    ax.set_xticks([0,100,200,300])
    ax.set_ylim(0,180)
    ax.set_yticks([0,80,160])

    axx.set_xlim(0,300)
    
    axx.set_ylim(280,400)
    axx.set_yticks([300,400])

    axx.set_xticks([])
    axx.set_xticklabels('')
    
    axx.spines['bottom'].set_visible(False)
    ax.spines['top' ].set_visible(False)
    
    axx.text(
        ref.groupby('space').max().loc['chrX','end']/1000/1000,
        chr_counts['chrX']-30,
        'X',
        fontsize=10,
        ha='center',
        va='center',
    )
#     d = 0.015  # how big to make the diagonal lines in axes coordinates
#     ax1 = axx
#     ax2 = ax
#     kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False)
#     ax1.plot((-d, +d), (-d, +d), **kwargs)
#     kwargs.update(transform=ax2.transAxes)
#     ax1.plot((1-d, 1+d), (1-d, 1+d), **kwargs)

#     kwargs.update(transform=ax1.transAxes)
#     ax2.plot((1-d, 1+d), (-d, +d), **kwargs)
#     kwargs.update(transform=ax2.transAxes)
#     ax2.plot((-d, +d), (1-d, 1+d), **kwargs)


    sns.regplot(
#         x.drop('chr21'), y.drop('chr21'),
        x.drop(['chr21','chrX']), y.drop(['chr21','chrX']),
        ci=95, 
        ax=ax, 
#         scatter=False,
        scatter_kws={'color':'w','zorder':0, 'alpha':0},
        line_kws={'lw':0, 'linestyle':'--', 'color':'k','zorder':0, 'alpha':0.5},
    )

#     model = LinearRegression(fit_intercept=False).fit( x.drop('chr21').values.reshape(-1,1), y.drop('chr21') )
#     r_sq = model.score(x.drop('chr21').values.reshape(-1,1), y.drop('chr21'))

    model = LinearRegression(fit_intercept=False).fit( x.drop(['chr21','chrX']).values.reshape(-1,1), y.drop(['chr21','chrX']) )
    r_sq = model.score(x.drop(['chr21','chrX']).values.reshape(-1,1), y.drop(['chr21','chrX']))

    ax.plot(np.linspace(50,250), model.predict( np.linspace(50,250).reshape((-1, 1)) ), '--', lw=1.5, color='k', zorder=0 )

#     texts1 = r'y=$\alpha$x+$\beta$'
#     texts2 = r'$R^2$=' + str(np.round(r_sq,2))
    texts1 = r'$R^2$=' + str(np.round(r_sq,2))

#     texts3 = r'$\alpha$=' + str(np.round(model.coef_[0],4))
#     texts4 = r'$\beta$='  + str(np.round(model.intercept_,4))
#     texts1 = 'y=ax+b'
#     texts2 = 'R2=' + str(np.round(r_sq,2))

#     texts3 = 'a=' + str(np.round(model.coef_[0],4))
#     texts4 = 'b='  + str(np.round(model.intercept_,4))
    
    ax.text(200, 20, texts1, fontsize=smallsize, va='center' )
#     ax.text(20, 135, texts2, fontsize=smallsize, va='center' )
#     ax.text(200, 30, texts3, fontsize=smallsize, va='center' )
#     ax.text(200, 15, texts4, fontsize=smallsize, va='center' )

    ax.tick_params(axis='both', labelsize=smallsize, length=5)
    axx.tick_params(axis='both', labelsize=smallsize, length=5)

    ax.set_ylabel(' '*8+'# of cells with CNA', fontsize=midsize, labelpad=-5)
    ax.set_xlabel('Chromosome size (Mb)', fontsize=midsize)
    
    return

示例#4

0

显示文件

文件： A4_Fig2.py 项目： chAwater/OpenFig

def male_genome_plot(ax, FSAMPLE=True):
    # fig, ax = plt.subplots(figsize=(20,4))

    n = pd.concat(
        [
            cells_tab, 
            xlsx_tab.set_index('SAMPLE')
        ], axis=1, sort=False
    ).query('GENDER=="male"')

    big_cnv_gain =(
        sum_df
        .loc[big_cnv_idx]
        .query('cnvTag=="Gain" & Gender=="male"')
    )

    big_cnv_loss =(
        sum_df
        .loc[big_cnv_idx]
        .query('cnvTag=="Loss" & Gender=="male"')
    )
    
    if not FSAMPLE:
        big_cnv_gain = big_cnv_gain.query('Sample!="$FSAMPLE"')
        big_cnv_loss = big_cnv_loss.query('Sample!="$FSAMPLE"')
        n = n.drop('$FSAMPLE')['n_pf'].sum()
    else:
        n = n['n_pf'].sum()
    

    bin_bed  = BedTools.from_dataframe(ref.loc[good_df.index])
    gain_bed = BedTools.from_dataframe(big_cnv_gain)
    loss_bed = BedTools.from_dataframe(big_cnv_loss)

    gain_s = BedTools.intersect(self=bin_bed,b=gain_bed,wa=True,c=True).to_dataframe().set_index(good_df.index)['score']
    loss_s = BedTools.intersect(self=bin_bed,b=loss_bed,wa=True,c=True).to_dataframe().set_index(good_df.index)['score']

    tmpCleanUp()
    
    ax.plot(np.arange(male_fig_df.columns.shape[0]),  100*gain_s[male_fig_df.columns].fillna(0)/n, '-', lw=2, color='darkorange',  label='Gain', zorder=1)
    ax.plot(np.arange(male_fig_df.columns.shape[0]), -100*loss_s[male_fig_df.columns].fillna(0)/n, '-', lw=2, color='deepskyblue', label='Loss', zorder=1)

    # Plot chroms. boundary
    g = ref.groupby('space').min()['abs_pos']

    for pos in chr_pos:
        ax.plot(np.tile(pos,10), np.linspace(-3, 9, 10), '-', color='black', alpha=0.5, lw=1, zorder=0)

    ax.set_ylabel('Freq.\n(%)', fontsize=midsize)
    ax.set_xticks( pd.Series(chr_pos).rolling(2).mean()[1:] )
    ax.set_xticklabels(
        ref['space'].drop_duplicates()[:-1].str.replace('chr','').replace('19','    19').replace('21','    21').replace('X','    X'), 
        fontsize=smallsize-4,
        ha='center',
        rotation=90,
    )
    ax.xaxis.set_ticks_position('top')
    ax.set_ylim(-0.4,0.4)
    ax.set_yticks( [-0.3, 0, 0.3] )
    ax.set_yticklabels( [0.3, 0 ,0.3], fontsize=smallsize )

    ax.tick_params(axis='x', bottom=False, length=0, pad=5)
    ax.tick_params(axis='y', length=5)
    
    return

示例#5

0

显示文件

文件： A4_Fig2.py 项目： chAwater/OpenFig

    gridspec_kw={'hspace':0.2, 'wspace':0.5},
)

axs = axs.flatten()
for n,chrom in enumerate(ref['space'].unique()[:-1]):
    radar_plot(axs[n],chrom, FSAMPLE=FSAMPLE)

axs[-2].axis('off')
axs[-1].axis('off')

fig.savefig('A4_SI_Radar.pdf', frameon=False, transparent=True, bbox_inches='tight')
plt.show()


# +
chrlen = (100*ref.groupby('space')['end'].max()/ref.groupby('space')['end'].max().sum())
cnv_chr_counts['norm_Cell'] = (cnv_chr_counts.set_index('Space')['Cell']/chrlen).dropna().values

df = cnv_chr_counts.pivot(index='Space',columns='Sample', values='norm_Cell').fillna(0).loc[
    ref['space'].unique(),
    xlsx_tab['SAMPLE'],
].drop('$FSAMPLE',axis=1).drop('chrY')


a4f = 2.5
width_a4f = 0.4
hight_a4f = 0.35

fig, ax = plt.subplots( 
    figsize=(a4f*width_a4f*8.27,a4f*hight_a4f*11.69),
)

示例#6

0

显示文件

    'Space=="chr6" & Sample=="XXX" & chrom_fraction>15 & chrom_fraction<50'
)['Cell'].unique()

lax = None
for i, n in enumerate(np.arange(1, 30, 3)):
    # for i,n in enumerate(np.arange(1,33,3)):
    ax_cell = fig.add_subplot(gs[n:n + 3, 20:])
    showCell(clone_cells[i], give_ax=[ax_cell], alpha=0.5, ms=1)
    ax_cell.set_ylabel('')
    ax_cell.yaxis.set_ticks_position('right')
    ax_cell.tick_params(axis='y', labelsize=smallsize, length=5)
    if not lax:
        lax = ax_cell

lax.set_xticks(
    ref.groupby('space', sort=False)['abs_pos'].min().rolling(2).mean()[1:])
lax.set_xticklabels(
    ref['space'].drop_duplicates()[:-1].str.replace('chr', '').replace(
        '17',
        '    17').replace('19',
                          '    19').replace('21',
                                            '     21').replace('X', '    X'),
    fontsize=smallsize,
    ha='center',
    rotation=90,
)
lax.xaxis.set_ticks_position('top')
lax.tick_params(axis='x', bottom=False, length=0, pad=5)
lax.set_title('F01 chr6 clonal CNA cells', fontsize=midsize, pad=-5)

fig.subplots_adjust(hspace=10, wspace=0.4)

示例#7

0

显示文件

def ideoPlot(chrom,ax):
    '''
    Plot ideo for one chromosome.
    '''
    tag_acen = 1
    chr_base = ref.groupby('space').min().loc[chrom,'abs_pos']
    rdis     = ref.groupby('space').max().loc[chrom,'end']*rf
    
    for idx, row in ideo.set_index('#chrom').loc[chrom].reset_index().iterrows():
        start    = row['chromStart'] + chr_base
        end      = row['chromEnd']   + chr_base
        gieStain = row['gieStain']

        if idx == 0:
            ax.set_xlim(start-rdis, ax.get_xlim()[1])
            v1 = [
                ( end,        tf ),
                ( end,        bf ),
                ( start,      bf ),
                ( start-rdis, bf ),
                ( start-rdis, tf ),
                ( start,      tf ),
                ( end,        tf ),
            ]
            p = PathPatch( Path(v1, codes), facecolor=color_dict[gieStain], edgecolor='black', lw=2, zorder=1 )
            ax.add_patch(p)
        elif idx == ideo.set_index('#chrom').loc[chrom].shape[0]-1:
            ax.set_xlim(ax.get_xlim()[0], end+rdis)
            v2 = [
                ( start,      bf ),
                ( start,      tf ),
                ( end,        tf ),
                ( end+rdis,   tf ),
                ( end+rdis,   bf ),
                ( end,        bf ),
                ( start,      bf ), 
            ]
            p = PathPatch( Path(v2, codes), facecolor=color_dict[gieStain], edgecolor='black', lw=2, zorder=1 )
            ax.add_patch(p)
        elif gieStain == 'acen':
            if tag_acen == 1:
                triangle = [
                    (start, bf),
                    (start, tf),
                    (end,   mf)
                ]
                tag_acen = 0
            elif tag_acen == 0:
                triangle = [
                    (end,   bf),
                    (end,   tf),
                    (start, mf)
                ]
                tag_acen = -1
            else:
                print ('Bug at acen!')
                continue
            p = Polygon( triangle, facecolor=color_dict[gieStain], edgecolor='black', lw=2, zorder=1 )
            ax.add_patch(p)
        else:
            p = Rectangle( (start,bf), end-start, height=(tf-bf), facecolor=color_dict[gieStain], edgecolor='black', lw=2, zorder=1 )
            ax.add_patch(p)
        
    ax.spines['top'   ].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'  ].set_visible(False)
    ax.spines['right' ].set_visible(False)

    ax.tick_params(bottom=False, length=2, pad=-20)
    ax.set_ylim(-4,1)
    ax.set_yticks([])
    
    chrom_bins = chrom_dict[chrom]
    boundary = max(2, chrom_bins//50)
    step = (chrom_bins+boundary)//5
    ideo_zero = ref.groupby('space').min().loc[chrom,'abs_pos']
    ax.set_xticks( 
         ideo_zero + np.arange( 0, (chrom_bins+boundary)+step, step )*1e6 
    )
    ax.set_xticklabels( 
        np.arange( 0, (chrom_bins+boundary)+step, step ),
        fontsize=midsize,
    )
    
    ax.set_xlim(ideo_zero-boundary*1e6,ideo_zero+(chrom_bins+boundary)*1e6)
    
    return ax

示例#8

0

显示文件

]

np.random.seed(0)
# -

# # Get data

max_alpha = 4
max_pie   = 40

# +
good_cells = good_df.xs(key='copy',axis=1,level=1).columns

sample_code_dict = xlsx_tab.set_index('SAMPLE')['CODE'].to_dict()

chrom_dict = (ref.groupby('space').max()['end']//1e6).astype(int).to_dict()

two_colors = sns.xkcd_palette(['red', 'bright blue'])

def name2code(name):
    code = []
    for i in name.split('_'):
        if i in sample_code_dict:
            code.append(sample_code_dict[i])
        else:
            code.append(i)

    return '_'.join(code)


# -