def test_integrated_model_works(ec_model_core, thermodb, mnx, compartment_data): """Test building block.""" thermodb = load_thermoDB(thermodb) compartment_data = pytfa.io.read_compartment_data(compartment_data) translate_model_mnx_to_seed(ec_model_core, thermodb, mnx) tmodel = adapt_gecko_to_thermo(ec_model_core, thermodb, compartment_data) assert get_thermo_coverage(tmodel) == 74
def __init__(self, model_code='ecoli:iJO1366', solver='gurobi', min_biomass=0.55): super().__init__(model_code, solver, min_biomass) if self.species == 'ecoli': # self.model.reactions.EX_glc__D_e.lower_bound = -1 * glc_uptake - glc_uptake_std # self.model.reactions.EX_glc__D_e.upper_bound = -1 * glc_uptake + glc_uptake_std thermo_db = load_thermoDB( join(data_dir, 'thermo/thermo_data.thermodb')) self.model = ThermoModel(thermo_db, self.model) self.model.name = self.model_name self.model.sloppy = True apply_compartment_data( self.model, read_compartment_data( join(data_dir, 'thermo/compartment_data.json'))) self.apply_annotation_data() self.model.prepare() self.model.convert() # self.model.reactions.get_by_id(self.objective).lower_bound = objective_lb self.model.repair() try: self.model.optimize() except (AttributeError, SolverError): self.model, _, _ = relax_dgo(self.model, in_place=True) # self.model.reactions.get_by_id(self.objective).lower_bound = 0 self.model.print_info()
def convert_cobra_to_tfa(cobra_model): """ Make tfa analysis of the model """ path_to_data = join(this_directory, '..', 'data/thermo_data.thermodb') thermo_data = load_thermoDB(path_to_data) tmodel = pytfa.ThermoModel(thermo_data, cobra_model) # for comp in tmodel.compartments.values(): # comp['c_min'] = 1e-8 tmodel.prepare() tmodel.convert(add_displacement=True) # Set the solver tmodel.solver = GLPK # Set solver options # GLPK option optimality and integrality deprecated #tmodel.solver.configuration.tolerances.optimality = 1e-9 #tmodel.solver.configuration.tolerances.integrality = 1e-9 tmodel.solver.configuration.tolerances.feasibility = 1e-9 # Find a solution solution = tmodel.optimize() return tmodel
def test_thermo_with_protein_constrain(ec_model_core, thermodb, compartment_data, mnx): """Check thermo model returns different solution that normal model.""" thermodb = load_thermoDB(thermodb) compartment_data = pytfa.io.read_compartment_data(compartment_data) translate_model_mnx_to_seed(ec_model_core, thermodb, mnx) tmodel = adapt_gecko_to_thermo(ec_model_core, thermodb, compartment_data) tsol = tmodel.slim_optimize() ec_model_core.proteins.prot_P25516.add_concentration(2e-5) tmodel = adapt_gecko_to_thermo(ec_model_core, thermodb, compartment_data) tsol_ec_constrained = tmodel.slim_optimize() assert pytest.approx(tsol) != tsol_ec_constrained
def test_thermo_constrain_solution(ec_model_core, thermodb, compartment_data, mnx): """Check thermo model returns different solution that normal model.""" sol = ec_model_core.optimize()[0] summed_sol = sol.fluxes.sum() thermodb = load_thermoDB(thermodb) compartment_data = pytfa.io.read_compartment_data(compartment_data) translate_model_mnx_to_seed(ec_model_core, thermodb, mnx) tmodel = adapt_gecko_to_thermo(ec_model_core, thermodb, compartment_data) tsol = tmodel.optimize() tsummed_sol = tsol.fluxes.sum() assert pytest.approx(tsummed_sol) != summed_sol
def test_write_thermodb(thermodb): """Check that the thermodb can be written to a file.""" thermodb = load_thermoDB(thermodb) thermodb["metabolites"]["protein"] = { "pKa": [7], "deltaGf_err": 0, "mass_std": 333.0, "struct_cures": {}, "id": "protein", "nH_std": 12, "name": "protein", "formula": "", "deltaGf_std": 0, "error": "Nil", "charge_std": 0, "struct_cues": {}, } write_thermodb(thermodb, "thermodb.thermodb") changed_tdb = load_thermoDB("thermodb.thermodb") # clean artifact os.remove("thermodb.thermodb") assert changed_tdb["metabolites"]["protein"]["deltaGf_std"] == 0
def test_relax_thermo_dgr_and_proteins_works( ec_model_core, experimental_copy_number, thermodb, compartment_data, slim_solution_core, ): """Test that constraining the model works.""" raw_proteomics = pd.read_csv(experimental_copy_number) ec_model = from_copy_number( ec_model_core.copy(), index=raw_proteomics["uniprot"], cell_copies=raw_proteomics["copies_per_cell"], stdev=raw_proteomics["stdev"], vol=2.3, dens=1.105e-12, water=0.3, ) thermodb = load_thermoDB(thermodb) compartment_data = pytfa.io.read_compartment_data(compartment_data) tmodel = adapt_gecko_to_thermo(ec_model, thermodb, compartment_data) sol_pre_relax = tmodel.slim_optimize() tmodel.reactions.BIOMASS_Ecoli_core_w_GAM.lower_bound = slim_solution_core # this is an inplace operation so we have to regenerate the model after it iis_sum_obj, status = relax_thermo_proteins( tmodel, [ prot.id for prot in tmodel.proteins if prot.concentration is not None and not isnan(prot.concentration) ], Objective_rule.MIN_ELASTIC_SUM_OBJECTIVE, ) assert status == "optimal" ec_model = from_copy_number( ec_model_core.copy(), index=raw_proteomics["uniprot"], cell_copies=raw_proteomics["copies_per_cell"], stdev=raw_proteomics["stdev"], vol=2.3, dens=1.105e-12, water=0.3, ) tmodel = adapt_gecko_to_thermo(ec_model, thermodb, compartment_data) for var in iis_sum_obj: if var in tmodel.proteins: tmodel.proteins.get_by_id(var).concentration = None sol_post_relax = tmodel.slim_optimize() assert sol_pre_relax < 0.1 if not isnan(sol_pre_relax) else True assert sol_post_relax > 0.1
def get_thermo_data(): # Load Thermo data thermo_data = load_thermoDB(pjoin(file_dir,'../../../pytfa/data/thermo_data.thermodb')) lexicon = read_lexicon('thermo_data/iJO1366_lexicon.csv') # lexicon = curate_lexicon(read_lexicon('thermo_data/iJO1366_lexicon.csv')) compartment_data = read_compartment_data('thermo_data/iJO1366_compartment_data.json') def curate_lexicon(lexicon): ix = pd.Series(lexicon.index) ix = ix.apply(lambda s: str.replace(s,'-','__')) ix = ix.apply(lambda s: '_'+s if s[0].isdigit() else s) lexicon.index = ix return lexicon lexicon = curate_lexicon(lexicon) return thermo_data, lexicon, compartment_data
def test_relax_concentrations_and_proteins_works( ec_model_core, experimental_copy_number, thermodb, compartment_data, slim_solution_core, ): """Test that constraining the model works.""" raw_proteomics = pd.read_csv(experimental_copy_number) ec_model = from_copy_number( ec_model_core.copy(), index=raw_proteomics["uniprot"], cell_copies=raw_proteomics["copies_per_cell"], stdev=raw_proteomics["stdev"], vol=2.3, dens=1.105e-12, water=0.3, ) thermodb = load_thermoDB(thermodb) compartment_data = pytfa.io.read_compartment_data(compartment_data) tmodel = adapt_gecko_to_thermo(ec_model, thermodb, compartment_data) sol_pre_relax = tmodel.slim_optimize() tmodel.variables.LC_atp_c.ub = 2e2 tmodel.variables.LC_atp_c.lb = 1e2 tmodel.reactions.BIOMASS_Ecoli_core_w_GAM.lower_bound = slim_solution_core * 0.4 # this is an inplace operation so we have to regenerate the model after it iis_sum_obj, status = relax_thermo_concentrations_proteins( tmodel, [ prot.id for prot in tmodel.proteins if prot.concentration is not None and not isnan(prot.concentration) ], Objective_rule.MIN_ELASTIC_SUM_OBJECTIVE, ) tmodel.objective = tmodel.reactions.BIOMASS_Ecoli_core_w_GAM sol_post_relax = tmodel.slim_optimize() # in the MILP relaxation atp is relaxed, whereas in the SUM LP OBJ relaxation # adp, gln and pi gets relaxed (in the opposite direction) compensating atp assert "NegSlackLC_atp_c" in iis_sum_obj or "PosSlackLC_adp_c" in iis_sum_obj assert status == "optimal" assert sol_pre_relax < 0.1 if not isnan(sol_pre_relax) else True assert pytest.approx(sol_post_relax) == slim_solution_core * 0.4
def load_data(model_name): """ Loads pre-curated model-specific thermodynamic information. Parameters ---------- model_name : str The name of a model. Returns ------- thermo_data : dict A thermodynamic database. lexicon : pandas.DataFrame A dataframe linking metabolite IDs to SEED compound IDs. compartment_data : dict A dictionary with information about each compartment of the model. """ thermo_data = load_thermoDB(static_path("thermo_data.thermodb")) lexicon = read_lexicon(static_path(model_name, "lexicon.csv")) compartment_data = read_compartment_data( static_path(model_name, "compartment_data.json")) return thermo_data, lexicon, compartment_data
#sol = pd.DataFrame.from_dict(data=res, orient='index', columns=['minimum','maximum']) #sol.to_csv('sol.csv',encoding='utf-8') return pd.DataFrame.from_dict(data=res, orient='index', columns=['minimum', 'maximum']) # In[32]: cobra_model = import_matlab_model( 'C:/Users/farza/Documents/Master_3/Systemes biology/small_ecoli.mat') fba_solution = cobra_model.optimize() thermo_data = load_thermoDB( 'C:/Users/farza/Documents/Master_3/Systemes biology/thermo_data.thermodb') tfa_model = pytfa.ThermoModel(thermo_data, cobra_model) tfa_model.solver = 'optlang-glpk' tfa_model.prepare() tfa_model.convert() ## Info on the model tfa_model.print_info() ## Optimality tfa_solution = tfa_model.optimize() # integrating metabolomics data (comment out this part if not integrating) df = pd.read_csv('metabolomics_data.csv', sep=';')
import pickle thermo_database = '/projectnb2/bioinfor/SEGRE/goldford/CoenzymeSpecificity/pytfa/data/thermo_data.thermodb' root_dir = '/projectnb/bioinfor/SEGRE/goldford/CoenzymeSpecificity/pytfa/tests/singleCoenzymeModel.08272021' CPLEX = 'optlang-cplex' GUROBI = 'optlang-gurobi' GLPK = 'optlang-glpk' solver = GUROBI case = 'full' # 'reduced' or full' # Load reaction DB print("Loading thermo data...") thermo_data = load_thermoDB(thermo_database) print("Done !") #biomass_rxn = 'BIOMASS_Ec_iJO1366_WT_53p95M' biomass_rxn = 'Ec_biomass_iJO1366_WT_53p95M' # We import pre-compiled data as it is faster for bigger models model_path = '/projectnb2/bioinfor/SEGRE/goldford/CoenzymeSpecificity/pytfa/models' cobra_model = load_json_model(model_path + '/iJO1366_NAD_ratio_1.fromTFA.json') lexicon = read_lexicon(model_path + '/iJO1366/lexicon.csv') compartment_data = read_compartment_data(model_path + '/iJO1366/compartment_data.json') # Initialize the cobra_model mytfa = pytfa.ThermoModel(thermo_data, cobra_model)
return model m = cobra_model.copy() rxn_ids = [x.id for x in cobra_model.reactions] m = cobra_model.copy() rxn_ids = [x.id for x in cobra_model.reactions] for rxnid in rxn_ids: m = single_coenzyme_transform(m, rxnid) m.remove_reactions([x for x in m.reactions if x.id == 'NADTRHD[condensed]'][0]) m.objective = 'Ec_biomass_iJO1366_WT_53p95M[condensed]' # Load reaction DB thermo_data = load_thermoDB(root_dir + 'data/thermo_data.thermodb') lexicon = read_lexicon(root_dir + 'models/small_ecoli/lexicon.csv') compartment_data = read_compartment_data( root_dir + 'models/small_ecoli/compartment_data.json') def convert2thermo(model, name): # Initialize the model tmodel = pytfa.ThermoModel(thermo_data, model) tmodel.name = name # Annotate the model annotate_from_lexicon(tmodel, lexicon) apply_compartment_data(tmodel, compartment_data) ## TFA conversion
from skimpy.io.generate_from_pytfa import FromPyTFA from skimpy.utils.general import sanitize_cobra_vars from skimpy.utils.tabdict import TabDict from skimpy.analysis.oracle import * """ Import and curate a model """ this_cobra_model = import_matlab_model('../../models/toy_model.mat', 'model') """ Make tfa model """ # Convert to a thermodynamics model thermo_data = load_thermoDB('../../data/thermo_data.thermodb') this_pytfa_model = pytfa.ThermoModel(thermo_data, this_cobra_model) GLPK = 'optlang-glpk' this_pytfa_model.solver = GLPK ## TFA conversion this_pytfa_model.prepare() this_pytfa_model.convert(add_displacement=True) """ Generate a draft Kinetic Model """ # Generate the KineticModel # Define the molecules that should be considered small-molecules
model.objective = oldobj model.objective_direction = olddir model.remove_cons_vars(maxcons) return pd.DataFrame.from_dict(data=res, orient='index', columns=['minimum','maximum']) cobra_model = import_matlab_model('C:/users/vicci/Desktop/vcpre/small_ecoli.mat') fba_solution = cobra_model.optimize() thermo_data = load_thermoDB('C:/users/vicci/Desktop/vcpre/thermo_data.thermodb') tfa_model = pytfa.ThermoModel(thermo_data, cobra_model) tfa_model.solver = 'optlang-glpk' tfa_model.prepare() tfa_model.convert() ## Info on the model tfa_model.print_info() ## Optimality tfa_solution = tfa_model.optimize()
def __init__(self, model_code='ecoli:iJO1366', solver='gurobi', min_biomass=0.55): start_time = time.time() super().__init__(model_code, solver, min_biomass) if self.species == 'ecoli': # Add cystein -> selenocystein transformation for convenience selcys = Metabolite(id='selcys__L_c', compartment='c', formula='C3H7NO2Se') selcys_rxn = Reaction(id='PSEUDO_selenocystein_synthase', name='PSEUDO Selenocystein_Synthase') selcys_rxn.add_metabolites({ self.model.metabolites.cys__L_c: -1, selcys: +1 }) self.model.add_reactions([selcys_rxn]) self._sanitize_varnames() # self.model.reactions.EX_glc__D_e.lower_bound = -1 * glc_uptake - glc_uptake_std # self.model.reactions.EX_glc__D_e.upper_bound = -1 * glc_uptake + glc_uptake_std # time_str = get_timestr() coupling_dict = get_coupling_dict(self.model, mode='kmax', atps_name='ATPS4rpp', infer_missing_enz=True) aa_dict, rna_nucleotides, rna_nucleotides_mp, dna_nucleotides = get_monomers_dict( ) essentials = get_essentials() # if has_thermo: thermo_db = load_thermoDB( join(data_dir, 'thermo/thermo_data.thermodb')) self.model = ThermoMEModel(thermo_db, model=self.model, growth_reaction=self.biomass_reaction, mu_range=mu_range, n_mu_bins=n_mu_bins) self.model.name = self.model_name # annotate_from_lexicon(self.model, read_lexicon(dir_path + '/data/thermo/lexicon.csv')) # compartment_data = read_compartment_data(dir_path + '/data/thermo/compartment_data.json') # apply_compartment_data(self.model, compartment_data) apply_compartment_data( self.model, read_compartment_data( join(data_dir, 'thermo/compartment_data.json'))) annotate_from_lexicon( self.model, read_lexicon(join(data_dir, 'thermo/lexicon.csv'))) self.model.prepare() # self.model.reactions.MECDPS.thermo['computed'] = False # self.model.reactions.NDPK4.thermo['computed'] = False # self.model.reactions.TMDPP.thermo['computed'] = False # self.model.reactions.ARGAGMt7pp.thermo['computed'] = False self.model.convert() # else: # self.model = MEModel(model=self.model, growth_reaction=growth_reaction_id, mu_range=mu_range, # n_mu_bins=n_mu_bins, name=name) # mrna_dict = get_mrna_dict(self.model) # nt_sequences = get_nt_sequences() nt_sequences = pd.read_csv(join( data_dir, f'{self.species}/{self.model_name}_nt_seq_kegg.csv'), index_col=0, header=None).iloc[:, 0] mrna_dict = self.get_mrna_dict(nt_sequences) rnap = get_rnap() rib = get_rib() # Remove nucleotides and amino acids from biomass reaction as they will be # taken into account by the expression remove_from_biomass_equation(model=self.model, nt_dict=rna_nucleotides, aa_dict=aa_dict, essentials_dict=essentials) self.model.add_nucleotide_sequences(nt_sequences) self.model.add_essentials(essentials=essentials, aa_dict=aa_dict, rna_nucleotides=rna_nucleotides, rna_nucleotides_mp=rna_nucleotides_mp) self.model.add_mrnas(mrna_dict.values()) self.model.add_ribosome(rib, free_ratio=0.2) # http://bionumbers.hms.harvard.edu/bionumber.aspx?id=102348&ver=1&trm=rna%20polymerase%20half%20life&org= # Name Fraction of active RNA Polymerase # Bionumber ID 102348 # Value 0.17-0.3 unitless # Source Bremer, H., Dennis, P. P. (1996) Modulation of chemical composition and other parameters of the cell by growth rate. # Neidhardt, et al. eds. Escherichia coli and Salmonella typhimurium: Cellular # and Molecular Biology, 2nd ed. chapter 97 Table 1 self.model.add_rnap(rnap, free_ratio=0.75) self.model.build_expression() self.model.add_enzymatic_coupling(coupling_dict) # if has_neidhardt: # nt_ratios, aa_ratios = get_ratios() # chromosome_len, gc_ratio = get_ecoli_gen_stats() # kdeg_mrna, mrna_length_avg = get_mrna_metrics() # kdeg_enz, peptide_length_avg = get_enz_metrics() # neidhardt_mu, neidhardt_rrel, neidhardt_prel, neidhardt_drel = get_neidhardt_data() # # add_interpolation_variables(self.model) # self.model.add_dummies(nt_ratios=nt_ratios, mrna_kdeg=kdeg_mrna, mrna_length=mrna_length_avg, # aa_ratios=aa_ratios, enzyme_kdeg=kdeg_enz, peptide_length=peptide_length_avg) # add_protein_mass_requirement(self.model, neidhardt_mu, neidhardt_prel) # add_rna_mass_requirement(self.model, neidhardt_mu, neidhardt_rrel) # add_dna_mass_requirement(self.model, mu_values=neidhardt_mu, dna_rel=neidhardt_drel, gc_ratio=gc_ratio, # chromosome_len=chromosome_len, dna_dict=dna_nucleotides) # Need to put after, because dummy has to be taken into account if used. self.model.populate_expression() self.model.add_trna_mass_balances() # self.model.growth_reaction.lower_bound = objective_lb self.model.repair() print( f"Building ETFL model costs {time.time() - start_time:.2f} seconds!" ) try: start_time = time.time() self.model.optimize() except (AttributeError, SolverError): print( f"Solving no relaxed model costs {time.time() - start_time:.2f} seconds!" ) start_time = time.time() self.model, _, _ = relax_dgo(self.model, in_place=True) print( f"Relaxing model costs {time.time() - start_time:.2f} seconds!" ) # self.model.growth_reaction.lower_bound = 0 # print(f"Build ETFL model for {time.time() - start_time:.2f} seconds!") self.model.print_info()