Exemplo n.º 1
0
def get_kinetic_param(name, value_col, organism='Escherichia coli'):
    k = S.read_cache(name)
    
    # filter by organsim
    k = k[k['Organism'] == organism]

    # filter out mutated enzymes
    k = k[~(k['Commentary'].str.find('mutant') > 0)]
    k = k[~(k['Commentary'].str.find('mutation') > 0)]
    
    # remove values with unmatched ligand
    k = k[pd.notnull(k['bigg.metabolite'])]

    # remove entries lacking quantitative data
    k = k[k[value_col] > 0]

    return k[['EC_number', 'bigg.metabolite', value_col]]
Exemplo n.º 2
0
    def get_km_data(self):
        km_df = S.read_cache('km')
        km_df = km_df[km_df['Organism'] == 'Escherichia coli']
        km_df = km_df[km_df['KM_Value'] != -999]
        km_df = km_df[['EC_number', 'KM_Value', 'bigg.metabolite']]
        km_df = km_df.groupby(('EC_number', 'bigg.metabolite')).median().reset_index()

        # some compounds have specific steriochemistry in BRENDA, but not in the
        # E. coli model (and other datasets). Therefore, we need to map them to 
        # the stereo-unspecific BiGG IDs in order to join the tables later
        stereo_mapping = {'fdp_B_c': 'fdp_c', 'f6p_B_c': 'f6p_c'}
        km_df['bigg.metabolite'].replace(stereo_mapping, inplace=True)
        
        # get a mapping from EC numbers to bigg.reaction,
        # remember we need to duplicate every reaction ID also for the reverse
        # reaction (since we use a model that is converted to irreversible)
        model_reactions = S.get_reaction_table_from_xls()
        bigg2ec = model_reactions[['Reaction Abbreviation', 'EC Number']]
        bigg2ec.rename(columns={'Reaction Abbreviation': 'bigg.reaction',
                                'EC Number': 'EC_number'}, inplace=True)
        bigg2ec = bigg2ec[~pd.isnull(bigg2ec['EC_number'])]
        bigg2ec['bigg.reaction'] = bigg2ec['bigg.reaction'].str.lower()
       
        bigg2ec_rev = bigg2ec.copy()
        bigg2ec_rev['bigg.reaction'] = bigg2ec_rev['bigg.reaction'].apply(lambda s: s + '_reverse')
        bigg2ec = pd.concat([bigg2ec, bigg2ec_rev], axis=0)
    
        # get the stoichiometric matrix in order to match only the substrates
        # of each reaction (i.e. the products should be associated only with
        # the "reverse" reaction)
        stoich_df = []
        for r in self.cobra_model.reactions:
            for m, coeff in r.metabolites.iteritems():
                stoich_df.append([r.id.lower(), m.id, coeff])
        stoich_df = pd.DataFrame(columns=['bigg.reaction', 'bigg.metabolite', 'stoichiometry'],
                                 data=stoich_df)
    
        km_df = pd.merge(km_df, bigg2ec, on='EC_number')
        km_df = pd.merge(km_df, stoich_df, on=('bigg.reaction', 'bigg.metabolite'))
        km_df['Km [M]'] = km_df['KM_Value'] * 1e-3 # convert mM to M
        km_df.drop('KM_Value', axis=1, inplace=True)
        
        return km_df
Exemplo n.º 3
0
# -*- coding: utf-8 -*-
"""
Created on Thu Jun  9 17:11:08 2016

@author: noore

This script will count the number of interactions per EC-metabolite pair
across all organisms.
"""
import settings as S
import pandas as pd
import os
import numpy as np

ki = S.read_cache('ki')
act = S.read_cache('activating')

ki_merge = ki.groupby(['EC_number', 'Compound'])
ki_count = ki_merge.count()['Organism'].reset_index()

act_merge = act.groupby(['EC_number', 'Compound'])
act_count = act_merge.count()['Organism'].reset_index()

# Write the merged files with some extra fun info
ki_idx = zip(ki_count['EC_number'], ki_count['Compound'])
ki_count['UniqueOrganisms_Ki'] = [
    ','.join(np.unique(ki.ix[ki_merge.groups[item], 'Organism']))
    for item in ki_idx
]

act_idx = zip(act_count['EC_number'], act_count['Compound'])
Exemplo n.º 4
0
import os
import pandas as pd
import scipy
import settings as S
import matplotlib.pyplot as plt
import seaborn as sns
sns.axes_style('whitegrid')

organism = 'Escherichia coli'

_df = pd.DataFrame.from_csv(S.ECOLI_METAB_FNAME)
_df.index.name = 'bigg.metabolite'
met_conc_mean = _df.iloc[:, 1:9]
met_conc_std = _df.iloc[:, 10:]

km = S.read_cache('km')
ki = S.read_cache('ki')

km = km[km['Organism'] == organism]
ki = ki[ki['Organism'] == organism]

km = km[km['KM_Value'] != -999]
ki = ki[ki['KI_Value'] != -999]

km_median = km.groupby('bigg.metabolite')['KM_Value'].median().reset_index()
ki_median = ki.groupby('bigg.metabolite')['KI_Value'].median().reset_index()

data = pd.merge(km_median, ki_median).join(met_conc_mean, on='bigg.metabolite')
data.set_index('bigg.metabolite', inplace=True)

concensus = data[~data.isnull().any(axis=1)]
import os
import pandas as pd
import scipy
import settings as S
import matplotlib.pyplot as plt
import seaborn as sns
sns.axes_style('whitegrid')

organism = 'Escherichia coli'

_df = pd.DataFrame.from_csv(S.ECOLI_METAB_FNAME)
_df.index.name = 'bigg.metabolite'
met_conc_mean = _df.iloc[:, 1:9]
met_conc_std = _df.iloc[:, 10:]

km = S.read_cache('km')
ki = S.read_cache('ki')

km = km[km['Organism'] == organism]
ki = ki[ki['Organism'] == organism]

km = km[km['KM_Value'] != -999]
ki = ki[ki['KI_Value'] != -999]

km_median = km.groupby('bigg.metabolite')['KM_Value'].median().reset_index()
ki_median = ki.groupby('bigg.metabolite')['KI_Value'].median().reset_index()

data = pd.merge(km_median, ki_median).join(met_conc_mean, on='bigg.metabolite')
data.set_index('bigg.metabolite', inplace=True)

concensus = data[~data.isnull().any(axis=1)]
Exemplo n.º 6
0
@author: noore
"""
import settings
import pandas as pd
import os
import wesci

logger = wesci.Logger(script_file=__file__, log_file_prefix="./prefix")

logger.add_input_files(
    {'regulation': os.path.join(settings.CACHE_DIR, 'regulation.csv')})

ORGANISM = 'Escherichia coli'

#%%
ki_df = settings.read_cache('regulation')
ki_df = ki_df[ki_df['Mode'] == '-']
ki_df.KI_Value.replace(-999, None, inplace=True)
ki_df = ki_df[ki_df['Organism'] == ORGANISM]
ki_df.drop(
    ['Organism', 'Compound', 'LigandID', 'LigandName', 'Mode', 'Mechanism'],
    axis=1,
    inplace=True)
ki_df = ki_df.drop_duplicates()

ccm_df = pd.read_csv(settings.ECOLI_CCM_FNAME, index_col=None)
ccm_df.set_index('EC_number', inplace=True)

# select only Ki values that involve CCM enzymes
ccm_inh = ki_df.join(ccm_df, on='EC_number', how='inner')
ccm_inh['type'] = 'KI'
Exemplo n.º 7
0

def literaturestring(subdf):
    # Summarizes literature references
    litstring = ';'.join(subdf['Literature'])
    litstring2 = ''.join(litstring.split(' '))
    uqlit = np.unique(litstring2.split(';'))
    return len(uqlit), ';'.join(uqlit)


# Set some parameters
tax2use = 'kingdom'
minsize = 10

# Read in central carbon metabolism reactions
ccm = S.read_cache('CCM_Reactions')
ccm['EcoliGene'] = ccm.index
ccm.index = ccm['EC']

reg = S.read_cache('regulation')
reg = reg[
    reg['Source'] ==
    'BRENDA']  # don't bias with just ecocyc/excluding remainder of biocyc

ki = reg[reg['Mode'] == '-']
act = reg[reg['Mode'] == '+']

#ki = S.get_data_df('inhibiting')
#act = S.read_cache('activating')
tax = S.read_cache('TaxonomicData')  # was TaxonomicData_temp
@author: noore
"""
import settings
import pandas as pd
import os
import wesci
logger = wesci.Logger(script_file=__file__, log_file_prefix="./prefix")

logger.add_input_files({'regulation':
    os.path.join(settings.CACHE_DIR, 'regulation.csv')})

ORGANISM = 'Escherichia coli'

#%%
ki_df = settings.read_cache('regulation')
ki_df = ki_df[ki_df['Mode'] == '-']
ki_df.KI_Value.replace(-999, None, inplace=True)
ki_df = ki_df[ki_df['Organism'] == ORGANISM]
ki_df.drop(['Organism', 'Compound', 'LigandID', 'LigandName', 'Mode', 'Mechanism'],
           axis=1, inplace=True)
ki_df = ki_df.drop_duplicates()

ccm_df = pd.read_csv(settings.ECOLI_CCM_FNAME, index_col=None)
ccm_df.set_index('EC_number', inplace=True)

# select only Ki values that involve CCM enzymes
ccm_inh = ki_df.join(ccm_df, on='EC_number', how='inner')
ccm_inh['type'] = 'KI'
ccm_inh.sort_values(['EC_number', 'bigg.metabolite', 'type'], inplace=True)
    # summarizes the entries in subdf
    return ';'.join([item +':' + str(subdf.ix[item]) for item in subdf.index])

def literaturestring( subdf ):
    # Summarizes literature references
    litstring = ';'.join(subdf['Literature'])
    litstring2 = ''.join(litstring.split(' '))
    uqlit = np.unique( litstring2.split(';') )
    return len(uqlit),';'.join(uqlit)

# Set some parameters
tax2use = 'kingdom'
minsize = 10

# Read in central carbon metabolism reactions
ccm = S.read_cache('CCM_Reactions')
ccm['EcoliGene'] = ccm.index
ccm.index = ccm['EC']

reg = S.read_cache('regulation')
reg = reg[reg['Source'] == 'BRENDA'] # don't bias with just ecocyc/excluding remainder of biocyc

ki = reg[reg['Mode'] == '-']
act = reg[reg['Mode'] == '+']

#ki = S.get_data_df('inhibiting')
#act = S.read_cache('activating')
tax = S.read_cache('TaxonomicData') # was TaxonomicData_temp

# Drop entries without organism
ki = ki[pd.notnull(ki['Organism'])]
Exemplo n.º 10
0
import settings as S
import pandas as pd
import os
import numpy as np
import pdb
import scipy.stats as st
import matplotlib.pyplot as plt
import seaborn as sns
plt.ion()
plt.close('all')

# Minimum number of interactions required to print data
minval = 3


ki = S.read_cache('inhibiting')
act = S.read_cache('activating')
tax = S.read_cache('TaxonomicData_temp')

# Drop entries without organism
ki = ki[pd.notnull(ki['Organism'])]
act = act[pd.notnull(act['Organism'])]

# Convert LigandID to string
ki['LigandID'] = ki['LigandID'].astype(str)
act['LigandID'] = act['LigandID'].astype(str)

# Drop null values
ki = ki[pd.notnull(ki['LigandID'])]
act = act[pd.notnull(act['LigandID'])]
Exemplo n.º 11
0
import numpy as np
from matplotlib_venn import venn3

def venn3_sets(set_a, set_b, set_c, set_labels, ax):
    # order of values for Venn diagram: (Abc, aBc, ABc, abC, AbC, aBC, ABC)
    Abc = len(set_a.difference(set_b.union(set_c)))
    aBc = len(set_b.difference(set_a.union(set_c)))
    abC = len(set_c.difference(set_a.union(set_b)))
    ABc = len(set_a.intersection(set_b).difference(set_c))
    AbC = len(set_a.intersection(set_c).difference(set_b))
    aBC = len(set_b.intersection(set_c).difference(set_a))
    ABC = len(set_a.intersection(set_b).intersection(set_c))
    venn3(subsets = (Abc, aBc, ABc, abC, AbC, aBC, ABC),
          set_labels=set_labels, ax=ax)

ki = S.read_cache('ki')
ki = ki[ki['Organism'] == 'Escherichia coli']
ki_unique = ki.groupby(['EC_number', 'bigg.metabolite']).first().reset_index()

act = S.read_cache('activating')
act = act[act['Organism'] == 'Escherichia coli']
act_unique = act.groupby(['EC_number', 'bigg.metabolite']).first().reset_index()

interactions = pd.concat([ki_unique[['EC_number', 'bigg.metabolite']],
                          act_unique[['EC_number', 'bigg.metabolite']]])

int_count_EC = interactions.groupby('EC_number').count()
int_count_EC.sort_values('bigg.metabolite', inplace=True, ascending=False)
int_count_EC.rename(columns={'bigg.metabolite': 'count(metabolites)'}, inplace=True)
int_count_EC.to_csv(os.path.join(S.RESULT_DIR, 'count_interactions_per_EC_number.csv'))