def essential_precursors_not_in_biomass(model, reaction): u""" Return a list of essential precursors missing from the biomass reaction. There are universal components of life that make up the biomass of all known organisms. These include all proteinogenic amino acids, deoxy- and ribonucleotides, water and a range of metabolic cofactors. Parameters ---------- model : cobra.Model The metabolic model under investigation. reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. Returns ------- list IDs of essential metabolites missing from the biomass reaction. The IDS will appear in the models namespace if the metabolite exists, but will be using the MetaNetX namespace if the metabolite does not exist in the model. Notes ----- "Answering the question of what to include in the core of a biomass objective function is not always straightforward. One example is different nucleotide forms, which, although inter-convertible, are essential for cellular chemistry. We propose here that all essential and irreplaceable molecules for metabolism should be included in the biomass functions of genome scale metabolic models. In the special case of cofactors, when two forms of the same cofactor take part in the same reactions (such as NAD and NADH), only one form could be included for the sake of simplicity. When a class of cofactors includes active and non-active interconvertible forms, the active forms should be preferred. [1]_." References ---------- .. [1] Xavier, J. C., Patil, K. R., & Rocha, I. (2017). Integration of Biomass Formulations of Genome-Scale Metabolic Models with Experimental Data Reveals Universally Essential Cofactors in Prokaryotes. Metabolic Engineering, 39(October 2016), 200–208. http://doi.org/10.1016/j.ymben.2016.12.002 """ main_comp = helpers.find_compartment_id_in_model(model, 'c') biomass_eq = bundle_biomass_components(model, reaction) pooled_precursors = set( [met for rxn in biomass_eq for met in rxn.metabolites]) missing_essential_precursors = [] for mnx_id in ESSENTIAL_PRECURSOR_IDS: try: met = helpers.find_met_in_model(model, mnx_id, main_comp)[0] if met not in pooled_precursors: missing_essential_precursors.append(met.id) except RuntimeError: missing_essential_precursors.append(mnx_id) return missing_essential_precursors
def find_biomass_precursors(model, reaction): """ Return a list of all biomass precursors excluding ATP and H2O. Parameters ---------- reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. """ id_of_main_compartment = helpers.find_compartment_id_in_model(model, 'c') gam_reactants = set() try: gam_reactants.update([ helpers.find_met_in_model(model, "MNXM3", id_of_main_compartment)[0] ]) except RuntimeError: pass try: gam_reactants.update([ helpers.find_met_in_model(model, "MNXM2", id_of_main_compartment)[0] ]) except RuntimeError: pass biomass_precursors = set(reaction.reactants) - gam_reactants return list(biomass_precursors)
def gam_in_biomass(model, reaction): """ Return boolean if biomass reaction includes growth-associated maintenance. Parameters ---------- reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. """ id_of_main_compartment = helpers.find_compartment_id_in_model(model, 'c') try: left = { helpers.find_met_in_model(model, "MNXM3", id_of_main_compartment)[0], helpers.find_met_in_model(model, "MNXM2", id_of_main_compartment)[0] } right = { helpers.find_met_in_model(model, "MNXM7", id_of_main_compartment)[0], helpers.find_met_in_model(model, "MNXM1", id_of_main_compartment)[0], helpers.find_met_in_model(model, "MNXM9", id_of_main_compartment)[0] } except RuntimeError: return False return (left.issubset(set(reaction.reactants)) and right.issubset(set(reaction.products)))
def gam_in_biomass(model, reaction): """ Return boolean if biomass reaction includes growth-associated maintenance. Parameters ---------- model : cobra.Model The metabolic model under investigation. reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. Returns ------- boolean True if the biomass reaction includes ATP and H2O as reactants and ADP, Pi and H as products, False otherwise. """ id_of_main_compartment = helpers.find_compartment_id_in_model(model, 'c') try: left = { helpers.find_met_in_model( model, "MNXM3", id_of_main_compartment)[0], helpers.find_met_in_model( model, "MNXM2", id_of_main_compartment)[0] } right = { helpers.find_met_in_model( model, "MNXM7", id_of_main_compartment)[0], helpers.find_met_in_model( model, "MNXM1", id_of_main_compartment)[0], helpers.find_met_in_model( model, "MNXM9", id_of_main_compartment)[0] } except RuntimeError: return False return ( left.issubset(set(reaction.reactants)) and right.issubset(set(reaction.products)))
def find_biomass_precursors(model, reaction): """ Return a list of all biomass precursors excluding ATP and H2O. Parameters ---------- reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. model : cobra.Model The metabolic model under investigation. Returns ------- list Metabolite objects that are reactants of the biomass reaction excluding ATP and H2O. """ id_of_main_compartment = helpers.find_compartment_id_in_model(model, 'c') gam_reactants = set() try: gam_reactants.update([ helpers.find_met_in_model( model, "MNXM3", id_of_main_compartment)[0]]) except RuntimeError: pass try: gam_reactants.update([ helpers.find_met_in_model( model, "MNXM2", id_of_main_compartment)[0]]) except RuntimeError: pass biomass_precursors = set(reaction.reactants) - gam_reactants return list(biomass_precursors)
def test_find_compartment_id_in_model(model, compartment_id, expected): """Expect the compartment ID of the model to be found correctly.""" comp_id = helpers.find_compartment_id_in_model(model, compartment_id) assert comp_id == expected
def find_ngam(model): u""" Return all potential non growth-associated maintenance reactions. From the list of all reactions that convert ATP to ADP select the reactions that match a defined reaction string and whose metabolites are situated within the main model compartment. The main model compartment is the cytosol, and if that cannot be identified, the compartment with the most metabolites. Parameters ---------- model : cobra.Model The metabolic model under investigation. Returns ------- list Reactions that qualify as non-growth associated maintenance reactions. Notes ----- [1]_ define the non-growth associated maintenance (NGAM) as the energy required to maintain all constant processes such as turgor pressure and other housekeeping activities. In metabolic models this is expressed by requiring a simple ATP hydrolysis reaction to always have a fixed minimal amount of flux. This value can be measured as described by [1]_ . References ---------- .. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for generating a high-quality genome-scale metabolic reconstruction. Nature protocols. Nature Publishing Group. http://doi.org/10.1038/nprot.2009.203 """ atp_adp_conv_rxns = helpers.find_converting_reactions( model, ("MNXM3", "MNXM7")) id_of_main_compartment = helpers.find_compartment_id_in_model(model, 'c') reactants = { helpers.find_met_in_model(model, "MNXM3", id_of_main_compartment)[0], helpers.find_met_in_model(model, "MNXM2", id_of_main_compartment)[0] } products = { helpers.find_met_in_model(model, "MNXM7", id_of_main_compartment)[0], helpers.find_met_in_model(model, "MNXM1", id_of_main_compartment)[0], helpers.find_met_in_model(model, "MNXM9", id_of_main_compartment)[0] } candidates = [ rxn for rxn in atp_adp_conv_rxns if rxn.reversibility is False and set(rxn.reactants) == reactants and set(rxn.products) == products ] buzzwords = [ 'maintenance', 'atpm', 'requirement', 'ngam', 'non-growth', 'associated' ] refined_candidates = [ rxn for rxn in candidates if any(string in filter_none(rxn.name, '').lower() for string in buzzwords) ] if refined_candidates: return refined_candidates else: return candidates
def test_find_compartment_id_in_model_exceptions(model, compartment_id): """Expect the compartment ID of the model to be found correctly.""" helpers.find_compartment_id_in_model(model, compartment_id)
def find_direct_metabolites(model, reaction, tolerance=1E-06): """ Return list of possible direct biomass precursor metabolites. The term direct metabolites describes metabolites that are involved only in either transport and/or boundary reactions, AND the biomass reaction(s), but not in any purely metabolic reactions. Parameters ---------- model : cobra.Model The metabolic model under investigation. reaction : cobra.Reaction The biomass reaction of the model under investigation. tolerance : float, optional Tolerance below which values will be regarded as zero. Returns ------- list Metabolites that qualify as direct metabolites i.e. biomass precursors that are taken up to be consumed by the biomass reaction only. """ biomass_rxns = set(helpers.find_biomass_reaction(model)) tra_bou_bio_rxns = helpers.find_interchange_biomass_reactions( model, biomass_rxns) try: precursors = find_biomass_precursors(model, reaction) main_comp = helpers.find_compartment_id_in_model(model, 'c') ext_space = helpers.find_compartment_id_in_model(model, 'e') except KeyError: LOGGER.error("Failed to properly identify cytosolic and extracellular " "compartments.") raise_with_traceback(KeyError("The cytosolic and/or extracellular " "compartments could not be identified.")) except RuntimeError: LOGGER.error("Failed to properly identify cytosolic and extracellular " "compartments.") raise_with_traceback(RuntimeError("The cytosolic and/or extracellular " "compartments could not be " "identified.")) else: tra_bou_bio_mets = [met for met in precursors if met.reactions.issubset(tra_bou_bio_rxns)] rxns_of_interest = set([rxn for met in tra_bou_bio_mets for rxn in met.reactions if rxn not in biomass_rxns]) solution = model.optimize(raise_error=True) if np.isclose(solution.objective_value, 0, atol=tolerance): LOGGER.error("Failed to generate a non-zero objective value with " "flux balance analysis.") raise OptimizationError( "The flux balance analysis on this model returned an " "objective value of zero. Make sure the model can " "grow! Check if the constraints are not too strict!") tra_bou_bio_fluxes = {r: solution[r.id] for r in rxns_of_interest} met_flux_sum = {m: 0 for m in tra_bou_bio_mets} return detect_false_positive_direct_metabolites( tra_bou_bio_mets, biomass_rxns, main_comp, ext_space, tra_bou_bio_fluxes, met_flux_sum)
def find_ngam(model): u""" Return all potential non growth-associated maintenance reactions. From the list of all reactions that convert ATP to ADP select the reactions that match a defined reaction string and whose metabolites are situated within the main model compartment. The main model compartment is the cytosol, and if that cannot be identified, the compartment with the most metabolites. Parameters ---------- model : cobra.Model The metabolic model under investigation. Returns ------- list Reactions that qualify as non-growth associated maintenance reactions. Notes ----- [1]_ define the non-growth associated maintenance (NGAM) as the energy required to maintain all constant processes such as turgor pressure and other housekeeping activities. In metabolic models this is expressed by requiring a simple ATP hydrolysis reaction to always have a fixed minimal amount of flux. This value can be measured as described by [1]_ . References ---------- .. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for generating a high-quality genome-scale metabolic reconstruction. Nature protocols. Nature Publishing Group. http://doi.org/10.1038/nprot.2009.203 """ atp_adp_conv_rxns = helpers.find_converting_reactions( model, ("MNXM3", "MNXM7") ) id_of_main_compartment = helpers.find_compartment_id_in_model(model, 'c') reactants = { helpers.find_met_in_model(model, "MNXM3", id_of_main_compartment)[0], helpers.find_met_in_model(model, "MNXM2", id_of_main_compartment)[0] } products = { helpers.find_met_in_model(model, "MNXM7", id_of_main_compartment)[0], helpers.find_met_in_model(model, "MNXM1", id_of_main_compartment)[0], helpers.find_met_in_model(model, "MNXM9", id_of_main_compartment)[0] } candidates = [rxn for rxn in atp_adp_conv_rxns if rxn.reversibility is False and set(rxn.reactants) == reactants and set(rxn.products) == products] buzzwords = ['maintenance', 'atpm', 'requirement', 'ngam', 'non-growth', 'associated'] refined_candidates = [rxn for rxn in candidates if any( string in filter_none(rxn.name, '').lower() for string in buzzwords )] if refined_candidates: return refined_candidates else: return candidates
def find_direct_metabolites(model, reaction, tolerance=1E-06): """ Return list of possible direct biomass precursor metabolites. The term direct metabolites describes metabolites that are involved only in either transport and/or boundary reactions, AND the biomass reaction(s), but not in any purely metabolic reactions. Parameters ---------- model : cobra.Model The metabolic model under investigation. reaction : cobra.Reaction The biomass reaction of the model under investigation. tolerance : float, optional Tolerance below which values will be regarded as zero. Returns ------- list Metabolites that qualify as direct metabolites i.e. biomass precursors that are taken up to be consumed by the biomass reaction only. """ biomass_rxns = set(helpers.find_biomass_reaction(model)) tra_bou_bio_rxns = helpers.find_interchange_biomass_reactions( model, biomass_rxns) try: precursors = find_biomass_precursors(model, reaction) main_comp = helpers.find_compartment_id_in_model(model, 'c') ext_space = helpers.find_compartment_id_in_model(model, 'e') except KeyError: LOGGER.error("Failed to properly identify cytosolic and extracellular " "compartments.") raise_with_traceback( KeyError("The cytosolic and/or extracellular " "compartments could not be identified.")) except RuntimeError: LOGGER.error("Failed to properly identify cytosolic and extracellular " "compartments.") raise_with_traceback( RuntimeError("The cytosolic and/or extracellular " "compartments could not be " "identified.")) else: tra_bou_bio_mets = [ met for met in precursors if met.reactions.issubset(tra_bou_bio_rxns) ] rxns_of_interest = set([ rxn for met in tra_bou_bio_mets for rxn in met.reactions if rxn not in biomass_rxns ]) solution = model.optimize(raise_error=True) if np.isclose(solution.objective_value, 0, atol=tolerance): LOGGER.error("Failed to generate a non-zero objective value with " "flux balance analysis.") raise OptimizationError( "The flux balance analysis on this model returned an " "objective value of zero. Make sure the model can " "grow! Check if the constraints are not too strict!") tra_bou_bio_fluxes = {r: solution[r.id] for r in rxns_of_interest} met_flux_sum = {m: 0 for m in tra_bou_bio_mets} return detect_false_positive_direct_metabolites(tra_bou_bio_mets, biomass_rxns, main_comp, ext_space, tra_bou_bio_fluxes, met_flux_sum)
def essential_precursors_not_in_biomass(model, reaction): u""" Return a list of essential precursors missing from the biomass reaction. There are universal components of life that make up the biomass of all known organisms. These include all proteinogenic amino acids, deoxy- and ribonucleotides, water and a range of metabolic cofactors. Parameters ---------- model : cobra.Model The metabolic model under investigation. reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. Returns ------- list IDs of essential metabolites missing from the biomass reaction. The IDS will appear in the models namespace if the metabolite exists, but will be using the MetaNetX namespace if the metabolite does not exist in the model. Notes ----- "Answering the question of what to include in the core of a biomass objective function is not always straightforward. One example is different nucleotide forms, which, although inter-convertible, are essential for cellular chemistry. We propose here that all essential and irreplaceable molecules for metabolism should be included in the biomass functions of genome scale metabolic models. In the special case of cofactors, when two forms of the same cofactor take part in the same reactions (such as NAD and NADH), only one form could be included for the sake of simplicity. When a class of cofactors includes active and non-active interconvertible forms, the active forms should be preferred. [1]_." Please note, that [1]_ also suggest to count C1 carriers (derivatives of tetrahydrofolate(B9) or tetrahydromethanopterin) as universal cofactors. We have omitted these from this check because there are many individual compounds that classify as C1 carriers, and it is not clear a priori which one should be preferred. In a future update, we may consider identifying these using a chemical ontology. References ---------- .. [1] Xavier, J. C., Patil, K. R., & Rocha, I. (2017). Integration of Biomass Formulations of Genome-Scale Metabolic Models with Experimental Data Reveals Universally Essential Cofactors in Prokaryotes. Metabolic Engineering, 39(October 2016), 200–208. http://doi.org/10.1016/j.ymben.2016.12.002 """ main_comp = helpers.find_compartment_id_in_model(model, 'c') biomass_eq = bundle_biomass_components(model, reaction) pooled_precursors = set( [met for rxn in biomass_eq for met in rxn.metabolites]) missing_essential_precursors = [] for mnx_id in ESSENTIAL_PRECURSOR_IDS: try: met = helpers.find_met_in_model(model, mnx_id, main_comp)[0] if met not in pooled_precursors: missing_essential_precursors.append(met.id) except RuntimeError: missing_essential_precursors.append(mnx_id) return missing_essential_precursors
def bundle_biomass_components(model, reaction): """ Return bundle biomass component reactions if it is not one lumped reaction. There are two basic ways of specifying the biomass composition. The most common is a single lumped reaction containing all biomass precursors. Alternatively, the biomass equation can be split into several reactions each focusing on a different macromolecular component for instance a (1 gDW ash) + b (1 gDW phospholipids) + c (free fatty acids)+ d (1 gDW carbs) + e (1 gDW protein) + f (1 gDW RNA) + g (1 gDW DNA) + h (vitamins/cofactors) + xATP + xH2O-> 1 gDCW biomass + xADP + xH + xPi. This function aims to identify if the given biomass reaction 'reaction', is a lumped all-in-one reaction, or whether it is just the final composing reaction of all macromolecular components. It is important to identify which other reaction belong to a given biomass reaction to be able to identify universal biomass components or calculate detailed precursor stoichiometries. Parameters ---------- model : cobra.Model The metabolic model under investigation. reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. Returns ------- list One or more reactions that qualify as THE biomass equation together. Notes ----- Counting H2O, ADP, Pi, H, and ATP, the amount of metabolites in a split reaction is comparatively low: Any reaction with less or equal to 15 metabolites can probably be counted as a split reaction containing Ash, Phospholipids, Fatty Acids, Carbohydrates (i.e. cell wall components), Protein, RNA, DNA, Cofactors and Vitamins, and Small Molecules. Any reaction with more than or equal to 28 metabolites, however, (21 AA + 3 Nucleotides (4-ATP) + 4 Deoxy-Nucleotides) can be considered a lumped reaction. Anything in between will be treated conservatively as a lumped reaction. For split reactions, after removing any of the metabolites associated with growth-associated energy expenditure (H2O, ADP, Pi, H, and ATP), the only remaining metabolites should be generalized macromolecule precursors e.g. Protein, Phospholipids etc. Each of these have their own composing reactions. Hence we include the reactions of these metabolites in the set that ultimately makes up the returned list of reactions that together make up the biomass equation. """ if len(reaction.metabolites) >= 16: return [reaction] id_of_main_compartment = helpers.find_compartment_id_in_model(model, 'c') gam_mets = ["MNXM3", "MNXM2", "MNXM7", "MNXM1", 'MNXM9'] try: gam = set([helpers.find_met_in_model( model, met, id_of_main_compartment)[0] for met in gam_mets]) except RuntimeError: gam = set() regex = re.compile('^{}(_[a-zA-Z]+?)*?$'.format('biomass'), re.IGNORECASE) biomass_metabolite = set(model.metabolites.query(regex)) macromolecules = set(reaction.metabolites) - gam - biomass_metabolite bundled_reactions = set() for met in macromolecules: bundled_reactions = bundled_reactions | set(met.reactions) return list(bundled_reactions)
def detect_energy_generating_cycles(model, metabolite_id): u""" Detect erroneous energy-generating cycles for a a single metabolite. The function will first build a dissipation reaction corresponding to the input metabolite. This reaction is then set as the objective for optimization, after closing all exchanges. If the reaction was able to carry flux, an erroneous energy-generating cycle must be present. In this case a list of reactions with a flux greater than zero is returned. Otherwise, the function returns False. Parameters ---------- model : cobra.Model The metabolic model under investigation. metabolite_id : str The identifier of an energy metabolite. Notes ----- "[...] energy generating cycles (EGC) [...] charge energy metabolites without a source of energy. [...] To efficiently identify the existence of diverse EGCs, we first add a dissipation reaction to the metabolic network for each metabolite used to transmit cellular energy; e.g., for ATP, the irreversible reaction ATP + H2O → ADP + P + H+ is added. These dissipation reactions close any existing energy-generating cycles, thereby converting them to type-III pathways. Fluxes through any of the dissipation reactions at steady state indicate the generation of energy through the metabolic network. Second, all uptake reactions are constrained to zero. The sum of the fluxes through the energy dissipation reactions is now maximized using FBA. For a model without EGCs, these reactions cannot carry any flux without the uptake of nutrients. [1]_." References ---------- .. [1] Fritzemeier, C. J., Hartleb, D., Szappanos, B., Papp, B., & Lercher, M. J. (2017). Erroneous energy-generating cycles in published genome scale metabolic networks: Identification and removal. PLoS Computational Biology, 13(4), 1–14. http://doi.org/10.1371/journal.pcbi.1005494 """ main_comp = helpers.find_compartment_id_in_model(model, 'c') met = helpers.find_met_in_model(model, metabolite_id, main_comp)[0] dissipation_rxn = Reaction('Dissipation') if metabolite_id in ['MNXM3', 'MNXM63', 'MNXM51', 'MNXM121', 'MNXM423']: # build nucleotide-type dissipation reaction dissipation_rxn.add_metabolites({ helpers.find_met_in_model(model, 'MNXM2', main_comp)[0]: -1, helpers.find_met_in_model(model, 'MNXM1', main_comp)[0]: 1, helpers.find_met_in_model(model, 'MNXM9', main_comp)[0]: 1, }) elif metabolite_id in ['MNXM6', 'MNXM10']: # build nicotinamide-type dissipation reaction dissipation_rxn.add_metabolites( {helpers.find_met_in_model(model, 'MNXM1', main_comp)[0]: 1}) elif metabolite_id in [ 'MNXM38', 'MNXM208', 'MNXM191', 'MNXM223', 'MNXM7517', 'MNXM12233', 'MNXM558' ]: # build redox-partner-type dissipation reaction dissipation_rxn.add_metabolites( {helpers.find_met_in_model(model, 'MNXM1', main_comp)[0]: 2}) elif metabolite_id == 'MNXM21': dissipation_rxn.add_metabolites({ helpers.find_met_in_model(model, 'MNXM2', main_comp)[0]: -1, helpers.find_met_in_model(model, 'MNXM1', main_comp)[0]: 1, helpers.find_met_in_model(model, 'MNXM26', main_comp)[0]: 1, }) elif metabolite_id == 'MNXM89557': dissipation_rxn.add_metabolites({ helpers.find_met_in_model(model, 'MNXM2', main_comp)[0]: -1, helpers.find_met_in_model(model, 'MNXM1', main_comp)[0]: 2, helpers.find_met_in_model(model, 'MNXM15', main_comp)[0]: 1, }) dissipation_product = helpers.find_met_in_model( model, ENERGY_COUPLES[metabolite_id], main_comp)[0] dissipation_rxn.add_metabolites({met: -1, dissipation_product: 1}) helpers.close_boundaries_sensibly(model) model.add_reactions([dissipation_rxn]) model.objective = dissipation_rxn solution = model.optimize(raise_error=True) if solution.objective_value > 0.0: return solution.fluxes[solution.fluxes.abs() > 0.0].index. \ drop(["Dissipation"]).tolist() else: return []
def bundle_biomass_components(model, reaction): """ Return bundle biomass component reactions if it is not one lumped reaction. There are two basic ways of specifying the biomass composition. The most common is a single lumped reaction containing all biomass precursors. Alternatively, the biomass equation can be split into several reactions each focusing on a different macromolecular component for instance a (1 gDW ash) + b (1 gDW phospholipids) + c (free fatty acids)+ d (1 gDW carbs) + e (1 gDW protein) + f (1 gDW RNA) + g (1 gDW DNA) + h (vitamins/cofactors) + xATP + xH2O-> 1 gDCW biomass + xADP + xH + xPi. This function aims to identify if the given biomass reaction 'reaction', is a lumped all-in-one reaction, or whether it is just the final composing reaction of all macromolecular components. It is important to identify which other reaction belong to a given biomass reaction to be able to identify universal biomass components or calculate detailed precursor stoichiometries. Parameters ---------- model : cobra.Model The metabolic model under investigation. reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. Returns ------- list One or more reactions that qualify as THE biomass equation together. Notes ----- Counting H2O, ADP, Pi, H, and ATP, the amount of metabolites in a split reaction is comparatively low: Any reaction with less or equal to 15 metabolites can probably be counted as a split reaction containing Ash, Phospholipids, Fatty Acids, Carbohydrates (i.e. cell wall components), Protein, RNA, DNA, Cofactors and Vitamins, and Small Molecules. Any reaction with more than or equal to 28 metabolites, however, (21 AA + 3 Nucleotides (4-ATP) + 4 Deoxy-Nucleotides) can be considered a lumped reaction. Anything in between will be treated conservatively as a lumped reaction. For split reactions, after removing any of the metabolites associated with growth-associated energy expenditure (H2O, ADP, Pi, H, and ATP), the only remaining metabolites should be generalized macromolecule precursors e.g. Protein, Phospholipids etc. Each of these have their own composing reactions. Hence we include the reactions of these metabolites in the set that ultimately makes up the returned list of reactions that together make up the biomass equation. """ if len(reaction.metabolites) >= 16: return [reaction] id_of_main_compartment = helpers.find_compartment_id_in_model(model, 'c') gam_mets = ["MNXM3", "MNXM2", "MNXM7", "MNXM1", 'MNXM9'] try: gam = set([ helpers.find_met_in_model(model, met, id_of_main_compartment)[0] for met in gam_mets ]) except RuntimeError: gam = set() regex = re.compile('^{}(_[a-zA-Z]+?)*?$'.format('biomass'), re.IGNORECASE) biomass_metabolite = set(model.metabolites.query(regex)) macromolecules = set(reaction.metabolites) - gam - biomass_metabolite bundled_reactions = set() for met in macromolecules: bundled_reactions = bundled_reactions | set(met.reactions) return list(bundled_reactions)