# Determine number of entries per gene
    gene = colicogs[colicogs['gene_name'].str.lower()==g.lower()]
    b_number = gene['b_number'].unique()[0]
    gene_product = gene['gene_product'].unique()[0]
    go_term = ';'.join(list(gene['go_terms'].unique()))
    if len(gene) > 0:
        cog_class = gene['cog_class'].values[0]
        cog_cat = gene['cog_category'].values[0]
        cog_letter = gene['cog_letter'].values[0]
        gene_product= gene['gene_product'].values[0]
        mw = gene['mw_fg'].values[0]
        go_term = ';'.join(list(gene['go_terms'].unique()))
        for _c, _d in d.groupby(['growth_rate_hr-1']):
            # volume predictions based on MG1655 data, Si, F. et al. (2017, 2019)
            vol = size.lambda2size(_c)
            # extract relevant information.
            gene_dict = {
                'gene_name': g.lower(),
                'b_number': b_number,
                'condition': _d['condition'].unique()[0],
                'corrected_volume': vol,
                'reported_tot_per_cell': _d['copy_number_molecule-per-fL'].values[0] * vol,
                'reported_fg_per_cell': _d['copy_number_molecule-per-fL'].values[0] * vol * mw,
                'go_terms':go_term,
                'cog_class': cog_class,
                'cog_category': cog_cat,
                'cog_letter': cog_letter,
                'gene_product': gene_product,
                'growth_rate_hr': _c
                }
# plot the max for respiration
ax1.plot(Ps_resp_rod, SA_V_ratio_rod, color=colors['blue'],
            label='rod', alpha=0.9, lw = 0.5, ls = '-.')
ax1.plot(Ps_resp_sphere, SA_V_ratio_sphere, color=colors['blue'],
            label='sphere', alpha=0.9, lw = 0.5, ls = '--')

ax1.fill_between(Ps_resp_, y1 = SA_V_ratio_sphere_, y2 = SA_V_ratio_rod_,
        color=colors['blue'],alpha=0.2, lw = 0)

# # Populate second plot with growth rates
# S/V for E. coli datasets
# Load the data set
data = pd.read_csv('../../data/compiled_absolute_measurements.csv')

for g, d in data.groupby(['dataset', 'condition', 'growth_rate_hr']):
    V = size.lambda2size(g[2])
    # ATP equivalents demand w.r.t. volume ; 1E6 ATP/(um3 s)
    Pv = 1E6 * V
    # assume aspect ratio of 4 (length/width), which is
    # appoximately correct for E. coli
    SA_rod = 2 * np.pi *  V**(2/3)
    SV = SA_rod/V

    ax1.plot(Pv, SV, 'o', color=dataset_colors[g[0]],
                    alpha=0.75, markeredgecolor='k', markeredgewidth=0.25,
                    label = g[2], ms=4, zorder=10)

# Format the axes
for a in [ax1]:#,ax2]:
    a.xaxis.set_tick_params(labelsize=5)
    a.yaxis.set_tick_params(labelsize=5)
示例#3
0
    gr_schmidt = d_schmidt_.growth_rate_hr.values[0]
    cond_schmidt = d_schmidt_.condition.values[0]

    d_schmidt_ = d_schmidt_[d_schmidt_.growth_rate_hr == gr_schmidt]
    d_schmidt_ = d_schmidt_[d_schmidt_.condition == cond_schmidt]

    schmidt_genes = d_schmidt_[d_schmidt_.b_number.isin(d.b_number.unique())]
    rel_schmidt = schmidt_genes.fg_per_cell.sum() / d_schmidt_.fg_per_cell.sum(
    )

    rel_corr_fg = df[df['growth_rate_hr'] == g]['reported_fg_per_cell'].sum() / \
            size.lambda2P(g)

    print(g, ': total mass fg: ', np.round(size.lambda2P(g), 2), ' volume: ',
          np.round(size.lambda2size(g), 2), ' relative change in total fg: ',
          np.round(1 / rel_corr_fg, 2), ' abundance relative to Schmidt: ',
          np.round(rel_schmidt, 2))

    df.loc[df['growth_rate_hr']==g, 'tot_per_cell'] = \
                    (df.loc[df['growth_rate_hr']==g]['reported_tot_per_cell'] / rel_corr_fg) * rel_schmidt
    df.loc[df['growth_rate_hr']==g, 'fg_per_cell'] =  \
                    (df.loc[df['growth_rate_hr']==g]['reported_fg_per_cell'] / rel_corr_fg) * rel_schmidt

#%%
df['dataset'] = 'valgepea_2013'
df['dataset_name'] = 'Valgepea et al. 2013'
df['strain'] = 'MG1655'
df.to_csv('../../../data/valgepea2013_longform_annotated.csv')
# %%
示例#4
0
# Load the complex subunit counts.
subunits = pd.read_csv('../../data/compiled_annotated_complexes.csv')

# # Load the compiled data
data = pd.read_csv('../../data/compiled_absolute_measurements.csv')

# Compute the minimum number of complexes.
complex_count = subunits.groupby([
    'dataset', 'dataset_name', 'condition', 'growth_rate_hr',
    'complex_annotation', 'complex'
])['n_units'].mean().reset_index()

complex_ribo = complex_count[complex_count.complex_annotation == 'ribosome']

for g, d in complex_ribo.groupby(['dataset', 'dataset_name']):
    ax.plot(size.lambda2size(d['growth_rate_hr']),
            d['n_units'],
            'o',
            color=dataset_colors[g[0]],
            alpha=0.75,
            markeredgecolor='k',
            markeredgewidth=0.25,
            label=g[1],
            ms=4,
            zorder=10)

ax.set_xlabel('estimated cell volume [fL]', fontsize=6)
ax.set_ylabel('ribosomes per cell', fontsize=6)
ax.xaxis.set_tick_params(labelsize=5)
ax.yaxis.set_tick_params(labelsize=5)
ax.legend(fontsize=6, loc='upper left')
示例#5
0
         reported_volume = rates.loc[rates['condition'] ==
                                     c]['volume_fL'].values[0]
         gene_dict = {
             'gene_name': g,
             'b_number': b_number,
             'condition': c,
             'reported_tot_per_cell': d[f'{c}_tot'].values[0],
             'reported_fg_per_cell': d[f'{c}_tot'].values[0] * mw,
             'go_terms': go_term,
             'cog_class': cog_class,
             'cog_category': cog_cat,
             'cog_letter': cog_letter,
             'growth_rate_hr': growth_rate,
             'gene_product': gene_product,
             'reported_volume': reported_volume,
             'corrected_volume': size.lambda2size(growth_rate)
         }
         dfs.append(pd.DataFrame(gene_dict, index=[0]))
 else:
     print(f'Warning!!! {g} not found in the gene list!')
     for c in conditions:
         growth_rate = rates.loc[rates['condition'] ==
                                 c]['growth_rate_hr'].values[0]
         reported_volume = rates.loc[rates['condition'] ==
                                     c]['volume_fL'].values[0]
         gene_dict = {
             'gene_name': g[0],
             'b_number': g[1],
             'condition': c,
             'reported_tot_per_cell': d[f'{c}_tot'].values[0],
             'reported_fg_per_cell': d[f'{c}_fg'].values[0],
示例#6
0
               label=g[1],
               ms=4,
               zorder=10)

ax[0].set_xlabel('estimated # ori', fontsize=6)
ax[0].set_ylabel('ribosomes per cell', fontsize=6)
ax[0].xaxis.set_tick_params(labelsize=5)
ax[0].yaxis.set_tick_params(labelsize=5)
ax[0].legend(fontsize=6, loc='upper left')

print(complex_ribo['n_units'].max() / complex_ribo['n_units'].min())

## Plot of ribosome concentration
for g, d in complex_ribo.groupby(['dataset', 'dataset_name']):
    ax[1].plot(d['growth_rate_hr'],
               d['n_units'] / size.lambda2size(d['growth_rate_hr']),
               'o',
               color=dataset_colors[g[0]],
               alpha=0.75,
               markeredgecolor='k',
               markeredgewidth=0.25,
               label=g[1],
               ms=4,
               zorder=10)

ax[1].set_xlabel('growth rate [hr$^{-1}$]', fontsize=6)
ax[1].set_ylabel('ribosome concentration [fL$^{-1}$]', fontsize=6)
ax[1].xaxis.set_tick_params(labelsize=5)
ax[1].yaxis.set_tick_params(labelsize=5)
ax[1].set_ylim(0, 40000)
# ax[1].legend(fontsize=6, loc = 'upper left')