示例#1
0
def main(family):
    #reference from the git script
    data_folder = os.path.join(Path(os.getcwd()).parents[1], 'data')
    
    #obtain all data from the irreducible set
    fam_mfs=MFS_family(family, data_folder + '/reactomes/all_families/',data_folder + '/models/all_models' )
    
    #####get reaction frequency######
    full_freq_m = fam_mfs.freq_m.copy()
    
    #only reactions that should be included in the analysis
    full_freq_m=full_freq_m.T[fam_mfs.include_reactome].T
    av_freq_m = np.mean(full_freq_m, axis=0)
    
    #######get_model_frequency########
    
    #get the model reaction frequency
    
    model_sample = np.zeros((1000, len(av_freq_m)))
    
    for i in range(1000):
        print(i)
        s1 = get_even_distances(fam_mfs.model_reactomes)
        mf = np.sum(fam_mfs.model_reactomes[s1], axis=0)/len(s1)
        model_sample[i] = mf[fam_mfs.include_reactome]
    
    ###get_environment#####
    ev =Env_ball(1000)
         
    transporter = ev.transporters[:]
    #water_index = transporter.index('EX_cpd00001_e')
    transporter.remove('EX_cpd00001_e')
    #oxygen_index  =transporter.index('EX_cpd00007_e')
    transporter.remove('EX_cpd00007_e')
    
    #external metabolites. Water and Oxygen are excluded
    transporter=np.array(transporter)
    
       
    
    
    mc = fam_mfs.model.copy()
    
    used_environment = np.zeros((1000, 290))
    
    for i in range(1000):
        gc.collect()
        v = fam_mfs.mfs[str(i)][fam_mfs.include_reactome].T
        used_environment[i] = get_environment_sample(mc, ev.matrix[i], ev.transporters, fam_mfs.reactome[fam_mfs.include_reactome], v, transporter,1000)
        print(i)  
    
    store = {'used_env':used_environment.copy(), 'model_sample':model_sample.copy(), 'full_freq_m':full_freq_m.copy(), 'reactome':fam_mfs.reactome[fam_mfs.include_reactome], 'transporter':transporter.copy()}
    pickle.dump(store, open(data_folder + '/pickles/' + family + '.pkl', 'wb'))
示例#2
0
 def add_transporter(self, model):
     
     
     ev = Env_ball(1000)
     
     for reaction in ev.transporters:
         met_id=reaction.replace('EX_','')
         met_name = ev.metabolites_d[met_id]
         react = Reaction(reaction)
         react.name = 'export of ' + met_name
         react.lower_bound = -1000.  # This is the default
         react.upper_bound = 1000.  # This is the default
         if not model.metabolites.has_id(met_id):
             m_e = Metabolite(met_id, name=met_name,compartment='e')
             react.add_metabolites({m_e: -1.0})
             model.add_reactions([react])
         else:
             react.add_metabolites({model.metabolites.get_by_id(met_id): -1.0})
             model.add_reactions([react])
             
     model.repair()
     model.optimize()
示例#3
0
        for i, name in enumerate(self.reactome):
            if self.gene_counts[i] > 0:  #condition 1)

                #condition 3)
                if fva.loc[name]['minimum'] < 0:
                    index[i] = 1
                elif fva.loc[name]['maximum'] > 0:
                    index[i] = 1

                #condition 2)
                if self.model.reactions.get_by_id(name).reversibility:
                    if nx.has_path(g, source=name + '_f', target='bio1'):
                        index[i] = 1
                    elif nx.has_path(g, source=name + '_r', target='bio1'):
                        index[i] = 1
                else:
                    if nx.has_path(g, source=name, target='bio1'):
                        index[i] = 1

        self.include_reactome = index.astype(np.bool)


ev = Env_ball(1000)

#families = os.listdir(pathToFamilyEFMs)

#fam_mfs={}

#for i in [families[0]]:
#    fam_panEFM[i]=panEFM_family(i, pathToFamilyEFMs,pathToModels)
def main(family):
    #reference from the git script
    data_folder = os.path.join(Path(os.getcwd()).parents[1], 'data')

    #load pickles
    fam_panEFM = pickle.load(
        open(data_folder + '/pickles/' + family + '.panEFM.pkl', 'rb'))

    fam_associate = pickle.load(
        open(data_folder + '/pickles/' + family + '.associate.pkl', 'rb'))

    #obtain all the primary data structures (see: Measures & data structures)

    #Reactome:[r]

    reactome = fam_associate['reactome'].copy()

    # Metabolome: [M]

    metabolome = fam_associate['transporter'].copy()

    # Environment Ball: [N1][M]

    environment_ball_class = Env_ball(1000)

    #exclude oxygen and water
    met_idx = np.array(
        [environment_ball_class.transporters.index(i) for i in metabolome])
    environment_ball = environment_ball_class.matrix.T[met_idx].T

    #FIRS: [E][N1][r]

    #get only the included reactions

    firs = {
        i: fam_panEFM.panEFM[i][fam_panEFM.include_reactome].T
        for i in fam_panEFM.panEFM
    }

    # Niche: [E][N1][M]

    niche = fam_associate['used_env'].copy()

    #shuffle sample orders
    for i in range(1000):
        r = np.arange(1000)
        np.random.shuffle(r)
        firs[str(i)] = firs[str(i)][r]
        niche[i] = niche[i][r]

    # Niche binary: [E][N1][M]
    niche_binary = {}

    for i in niche:
        niche_binary[i] = niche[i].copy()
        niche_binary[i][np.round(niche_binary[i], 10) != 0] = 1.0

    #Models: [s]

    models = np.array([i.replace('.sbml', '') for i in fam_panEFM.model_files])

    # Model reactomes: [s][r]
    model_reactomes = fam_panEFM.model_reactomes.copy()
    model_reactomes = model_reactomes.T[fam_panEFM.include_reactome].T

    #Model sample: [d][r]

    model_sample_idx = np.array(
        get_even_distances(model_reactomes, metric='hamming'))
    model_sample = model_reactomes[model_sample_idx]

    # FIRS growth rate: [E][N1]

    firs_growth_rate = np.zeros((1000, 1000))

    for i in range(1000):
        firs_growth_rate[i] = np.sum(niche[i], axis=1)

    #remove CO2 and H+

    met_idx = (metabolome != 'EX_cpd00011_e') & (metabolome != 'EX_cpd00067_e')
    metabolome = metabolome[met_idx]
    environment_ball = environment_ball.T[met_idx].T

    for i in niche:
        niche[i] = niche[i].T[met_idx].T

    for i in niche_binary:
        niche_binary[i] = niche_binary[i].T[met_idx].T

    ######Secondary Data Structures###

    #Size of FIRS: [E][N1]
    size_of_firs = np.zeros((1000, 1000))
    for i in range(1000):
        size_of_firs[i] = np.sum(firs[str(i)], axis=1)

    #Size of Niche: [E][N1]
    size_of_niches = np.zeros((1000, 1000))
    for i in range(1000):
        size_of_niches[i] = np.sum(niche_binary[i], axis=1)

    #Size of models: [s]
    size_of_models = np.zeros(len(models))
    for i, v in enumerate(model_reactomes):
        size_of_models[i] = sum(v > 0)

    #Fluidity of FIRS within environments: [E]

    fluidity_firs_within = np.zeros(1000)
    for i in range(1000):
        fluidity_firs_within[i] = get_fluidity_index(firs[str(i)], 1000)

    #Fluidity of FIRS across environments: [N2]

    fluidity_firs_across = np.zeros(10000)

    for i in range(10000):

        rintA = np.random.randint(0, 1000, size=2)
        rintB = np.random.randint(0, 1000, size=2)
        s1 = sum(
            np.clip(
                firs[str(rintA[0])][rintB[0]] - firs[str(rintA[1])][rintB[1]],
                0, 1))
        s2 = sum(
            np.clip(
                firs[str(rintA[1])][rintB[1]] - firs[str(rintA[0])][rintB[0]],
                0, 1))
        fluidity_firs_across[i] = (s1 + s2) / sum(
            (firs[str(rintA[0])][rintB[0]] +
             firs[str(rintA[1])][rintB[1]]) > 0)

    #Fluidity of niches: [E]
    fluidity_niche_within = np.zeros(1000)
    for i in range(1000):

        fluidity_niche_within[i] = get_fluidity_index(niche_binary[i], 1000)

    #Fluidity across niches: [N2]
    fluidity_niche_across = np.zeros(10000)

    for i in range(10000):

        rintA = np.random.randint(0, 1000, size=2)
        rintB = np.random.randint(0, 1000, size=2)
        s1 = sum(
            np.clip(
                niche_binary[rintA[0]][rintB[0]] -
                niche_binary[rintA[1]][rintB[1]], 0, 1))
        s2 = sum(
            np.clip(
                niche_binary[rintA[1]][rintB[1]] -
                niche_binary[rintA[0]][rintB[0]], 0, 1))
        fluidity_niche_across[i] = (s1 + s2) / sum(
            (niche_binary[rintA[0]][rintB[0]] +
             niche_binary[rintA[1]][rintB[1]]) > 0)

    #Fluidity of models: [N2]

    fluidity_models = np.zeros(10000)
    for i in range(10000):
        print(i)
        fluidity_models[i] = get_fluidity_index(model_reactomes, 2)

    #Fluidity of model samples: [N2]
    fluidity_model_samples = np.zeros(10000)
    for i in range(10000):
        print(i)
        fluidity_model_samples[i] = get_fluidity_index(model_sample, 2)

    #Frequency of reactions: [E][r]

    freq_reactions = np.zeros((1000, len(reactome)))
    for i in range(1000):
        freq_reactions[i] = np.sum(firs[str(i)], axis=0) / 1000

    #Residual reactions frequency: [E][r]
    freq_reactions_m = np.mean(freq_reactions, axis=0)

    residual_reaction_freq = freq_reactions - freq_reactions_m

    #niche driven score for reactions: [r]

    niche_score_reactions = np.round(np.std(residual_reaction_freq, axis=0), 5)

    #Reaction frequency in models: [r]
    freq_mod_reactions = np.sum(model_reactomes, axis=0) / len(models)

    #Reaction frequency in model sample[r]
    freq_mod_samp_reactions = np.sum(model_sample, axis=0) / len(model_sample)

    #Metabolite usage frequency: [E][M]

    freq_metabolite_use = np.zeros((1000, len(metabolome)))
    for i in range(1000):
        freq_metabolite_use[i] = np.sum(niche_binary[i], axis=0) / 1000
    freq_metabolite_use_m = np.mean(freq_metabolite_use, axis=0)

    residual_metabolite_freq = freq_metabolite_use - freq_metabolite_use_m

    #metabolite usage flux [E][M]

    metabolite_usage_flux = np.zeros((1000, len(metabolome)))
    for i in range(1000):
        metabolite_usage_flux[i] = np.sum(niche[i], axis=0) / 1000

    #residual metabolite usage flux: [E][M]

    metabolite_usage_flux_m = np.mean(metabolite_usage_flux, axis=0)
    residual_metabolite_usage_flux = metabolite_usage_flux - metabolite_usage_flux_m

    #niche driven score for metabolites: [M]
    niche_score_metabolites = np.round(
        np.std(residual_metabolite_usage_flux, axis=0), 5)

    #####x, y: non zero reactions frequencies and metabolites usage flux####

    x_reactome = reactome[niche_score_reactions != 0]
    x_reac_freq = freq_reactions.T[niche_score_reactions != 0].T

    y_metabolome = metabolome[niche_score_metabolites != 0]
    y_met_usage_flux = metabolite_usage_flux.T[niche_score_metabolites != 0].T

    y_met_freq = freq_metabolite_use.T[niche_score_metabolites != 0].T

    #correlation: [r][M]

    correlation = np.zeros((len(x_reactome), len(y_metabolome)))

    for i, reac in enumerate(x_reac_freq.T):
        correlation[i] = np.array([
            sts.pearsonr(reac.flatten(), metab.flatten())[0]
            for metab in y_met_usage_flux.T
        ])

    #correlation metabolite frequency

    correlation_met_freq = np.zeros((len(x_reactome), len(y_metabolome)))

    for i, reac in enumerate(x_reac_freq.T):
        correlation_met_freq[i] = np.array([
            sts.pearsonr(reac.flatten(), metab.flatten())[0]
            for metab in y_met_freq.T
        ])

    #Reaction pairwise distance: [E][E]
    reaction_pairwise_distance = sps.distance.squareform(
        sps.distance.pdist(freq_reactions))

    #FIRS pairwise distance: [E]
    firs_pairwise_distance = np.zeros(1000)

    for i in range(1000):
        firs_pairwise_distance[i] = np.mean(
            sps.distance.pdist(firs[str(i)], metric='hamming'))

    #Niche binary pairwise distance: [E]
    niche_binary_pairwise_distance = np.zeros(1000)

    for i in range(1000):
        niche_binary_pairwise_distance[i] = np.mean(
            sps.distance.pdist(niche_binary[i], metric='hamming'))

    #Niche pairwise distance: [E]
    niche_pairwise_distance = np.zeros(1000)

    for i in range(1000):
        niche_pairwise_distance[i] = np.mean(sps.distance.pdist(niche[i]))

    #Niche distance: [E][E]
    niche_distance = sps.distance.squareform(
        sps.distance.pdist(metabolite_usage_flux))

    #DNDS_reaction: [N1]
    dn_reactions = np.zeros(1000)
    ds_reactions = np.zeros(1000)
    rand_idx = np.arange(1000)
    for i in range(1000):
        f1 = np.random.randint(0, 1000, size=2)
        np.random.shuffle(rand_idx)
        ds_reactions[i] = flip_p(firs[str(f1[0])], firs[str(f1[0])][rand_idx],
                                 1000)
        dn_reactions[i] = flip_p(firs[str(f1[0])], firs[str(f1[1])], 1000)

    store ={'size_of_firs': size_of_firs, 'size_of_niches': size_of_niches,\
            'size_of_models': size_of_models, 'fluidity_firs_within': fluidity_firs_within,\
            'fluidity_firs_across':fluidity_firs_across, 'fluidity_niche_within':fluidity_niche_within,\
            'fluidity_niche_across': fluidity_niche_across, 'fluidity_models': fluidity_models,\
            'fluidity_model_samples': fluidity_model_samples, 'freq_reactions': freq_reactions, \
            'residual_reaction_freq': residual_reaction_freq, 'niche_score_reactions': niche_score_reactions,\
            'freq_mod_reactions': freq_mod_reactions, 'freq_mod_samp_reactions': freq_mod_samp_reactions,\
            'freq_metabolite_use': freq_metabolite_use, 'metabolite_usage_flux': metabolite_usage_flux,\
            'metabolite_usage_flux_m': metabolite_usage_flux_m, 'niche_score_metabolites':niche_score_metabolites,\
            'x_reactome': x_reactome, 'x_reac_freq':x_reac_freq,\
            'y_metabolome' : y_metabolome, 'y_met_usage_flux':y_met_usage_flux,\
            'y_met_freq': y_met_freq, 'correlation': correlation, \
            'correlation_met_freq':correlation_met_freq, 'reaction_pairwise_distance':reaction_pairwise_distance,\
            'firs_pairwise_distance': firs_pairwise_distance, 'niche_binary_pairwise_distance': niche_binary_pairwise_distance,\
            'niche_pairwise_distance': niche_pairwise_distance, 'niche_distance': niche_distance,\
            'dn_reactions': dn_reactions, 'ds_reactions':ds_reactions}
    pickle.dump(
        store,
        open(data_folder + '/pickles/' + family + '.secondaryDS.pkl', 'wb'))
示例#5
0
            add_reaction(orig_obj, model, reactions[samples[i]], up_low_b_dict)
    print(orig_obj, np.round(float(model.slim_optimize()), decimals=12))

    prof_r = np.ones(len(reactions))
    for i, v in enumerate(reactions):
        if (model.reactions.get_by_id(v).upper_bound
                == 0) and (model.reactions.get_by_id(v).lower_bound == 0):
            prof_r[i] = 0.0
    prof_t = np.ones(len(transporters))
    for i, v in enumerate(transporters):
        if model.reactions.get_by_id(v).flux >= 0:
            prof_t[i] = 0.0
    return prof_r, prof_t


eb = Env_ball(1000)
transporters = eb.transporters[:]
random_environments = eb.matrix.copy()

model = cobra.io.read_sbml_model(
    'C:/Users/danie/Documents/random_work_stuff/home/home/Files/Aeromonadaceae.ensembl.sbml'
)

model.solver = 'gurobi'
reactions = [i.id for i in model.reactions if 'rxn' in i.id]

up_low_b_dict = {}
for reaction in reactions:
    up_low_b_dict[reaction] = (model.reactions.get_by_id(reaction).upper_bound,
                               model.reactions.get_by_id(reaction).lower_bound)