def lp_sampler(model, n_samples=1000, weights=None, constraints=None, select_probability=0.01, futile_cycle_threshold=1e2, variation_threshold=1e-4, merge_keys=False, verbose=True): if not weights: variability = FVA(model, constraints=constraints) weights = {r_id: 1.0 / (ub - lb) for r_id, (lb, ub) in variability.items() if ub is not None and lb is not None and variation_threshold < (ub - lb) < futile_cycle_threshold} samples = [] solver = solver_instance(model) for i in range(n_samples): objective = {r_id: gauss(0, 1)*W for r_id, W in weights.items() if random() < select_probability} sol = pFBA(model, objective=objective, constraints=constraints, solver=solver) if sol.status == Status.OPTIMAL: samples.append(sol.values) if verbose: print('Sampling success rate: {} (of {})'.format(len(samples), n_samples)) if merge_keys: merged = OrderedDict() for r_id in model.reactions: merged[r_id] = [sample[r_id] for sample in samples] samples = merged return samples
def build_ensemble(model, reaction_scores, size, outputfile=None, flavor=None, init_env=None): """ Reconstruct a model ensemble using the CarveMe approach. Args: model (CBModel): universal model reaction_scores (dict): reaction scores size (int): ensemble size outputfile (str): write model to SBML file (optional) flavor (str): SBML flavor ('cobra' or 'fbc2', optional) init_env (Environment): initialize final model with given Environment (optional) Returns: EnsembleModel: reconstructed ensemble """ scores = dict(reaction_scores[['reaction', 'normalized_score']].values) unscored = [r_id for r_id in model.reactions if r_id not in scores and not r_id.startswith('R_EX')] logstd = np.std(np.log([x for x in scores.values() if x > 0])) reaction_status = {r_id: [] for r_id in model.reactions} solver = solver_instance(model) failed = 0 for i in range(size): random_scores = -np.exp(logstd * np.random.randn(len(unscored))) all_scores = dict(zip(unscored, random_scores)) all_scores.update(scores) sol = minmax_reduction(model, all_scores, solver=solver) if sol.status == Status.OPTIMAL: for r_id in model.reactions: active = (abs(sol.values[r_id]) >= 1e-6 or (sol.values.get('yf_' + r_id, 0) > 0.5) or (sol.values.get('yr_' + r_id, 0) > 0.5)) reaction_status[r_id].append(active) else: failed += 1 ensemble_size = size - failed ensemble = EnsembleModel(model, ensemble_size, reaction_status) ensemble.simplify() for i, row in reaction_scores.iterrows(): r_id = row['reaction'] if r_id in ensemble.model.reactions: gpr = parse_gpr_rule(row['GPR']) ensemble.model.reactions[r_id].set_gpr_association(gpr) if init_env: init_env.apply(ensemble.model, inplace=True, warning=False) if outputfile: cleanup_metadata(ensemble.model) save_ensemble(ensemble, outputfile, flavor=flavor)
def benchmark_build_problem(modelpath, n=10): model = load_sbml_model(modelpath, GPR_CONSTRAINED) fix_bigg_model(model) print 'benchmarking build problem for', n, 'instances:', tstart = time() for i in range(n): solver = solver_instance() solver.build_problem(model) tend = time() print 'took', tend - tstart
def build_reaction_solution_pool(model, rxns2rxns, constraints=None): reaction_sets = reduce(set.__or__, rxns2rxns.values()) solution_pool = {} solver = solver_instance() solver.build_problem(model) for reaction_set in reaction_sets: solution_pool[reaction_set] = validate_reaction_deletions(model, reaction_set, constraints, solver) return solution_pool
def build_reaction_solution_pool2(model, gene2rxns, constraints=None): solution_pool = {} solver = solver_instance() solver.build_problem(model) reaction_sets = set(gene2rxns.values()) for reaction_set in reaction_sets: solution_pool[reaction_set] = validate_reaction_deletions(model, reaction_set, constraints, solver) return solution_pool
def benchmark_solving_stage(modelpath, n=10): model = load_sbml_model(modelpath, GPR_CONSTRAINED) fix_bigg_model(model) print 'benchmarking solving stage for', n, 'repetitions:', solver = solver_instance() solver.build_problem(model) tstart = time() for i in range(n): FBA(model, solver=solver) tend = time() print 'took', tend - tstart
def lp_sampler(model, n_samples=1000, weights=None, constraints=None, select_probability=0.01, futile_cycle_threshold=1e2, variation_threshold=1e-4, merge_keys=False, verbose=True): if not weights: variability = FVA(model, constraints=constraints) weights = { r_id: 1.0 / (ub - lb) for r_id, (lb, ub) in variability.items() if ub is not None and lb is not None and variation_threshold < (ub - lb) < futile_cycle_threshold } samples = [] solver = solver_instance(model) for i in range(n_samples): objective = { r_id: gauss(0, 1) * W for r_id, W in weights.items() if random() < select_probability } sol = pFBA(model, objective=objective, constraints=constraints, solver=solver) if sol.status == Status.OPTIMAL: samples.append(sol.values) if verbose: print('Sampling success rate: {} (of {})'.format( len(samples), n_samples)) if merge_keys: merged = OrderedDict() for r_id in model.reactions: merged[r_id] = [sample[r_id] for sample in samples] samples = merged return samples
def multiGapFill(model, universe, media, media_db, min_growth=0.1, max_uptake=10, scores=None, inplace=True, bigM=1e3, exchange_format=None): """ Gap Fill a metabolic model for multiple environmental conditions Args: model (CBModel): original model universe (CBModel): universe model media (list): list of growth media ids media_db (dict): growth media database (see notes) min_growth (float): minimum growth rate (default: 0.1) max_uptake (float): maximum uptake rate (default: 10) scores (dict): reaction scores (optional, see *gapFill* for details) inplace (bool): modify given model in place (default: True) bigM (float): maximal reaction flux (default: 1000) Returns: CBModel: gap filled model (if inplace=False) Notes: *media_db* is a dict from medium name to the list of respective compounds. """ if not inplace: model = model.copy() new_reactions = set(universe.reactions) - set(model.reactions) for r_id in new_reactions: if r_id.startswith('R_EX'): universe.set_lower_bound(r_id, 0) merged_model = merge_models(model, universe, inplace=False) solver = solver_instance(merged_model) for medium_name in media: if medium_name in media_db: compounds = media_db[medium_name] constraints = medium_to_constraints(merged_model, compounds, max_uptake=max_uptake, inplace=False, exchange_format=exchange_format, verbose=False) gapFill(model, universe, constraints=constraints, min_growth=min_growth, scores=scores, inplace=True, bigM=bigM, solver=solver, tag=medium_name) else: print 'Medium {} not in database, ignored.'.format(medium_name) return model
def species_coupling_score(community, environment, min_growth=1.0, max_uptake=100, n_solutions=100): """ Calculate frequency of community species dependency on each other Zelezniak A. et al, Metabolic dependencies drive species co-occurrence in diverse microbial communities (PNAS 2015) Args: community (Community): community object environment (Environment): Metabolic environment in which the SMETANA score is colulated min_growth (float): minimum growth rate (default: 1) max_uptake (float): maximum uptake rate (default: 100) abstol (float): tolerance for detecting a non-zero exchange flux (default: 1e-6) n_solutions (int): How many unique solutions to calculate (default: 100) Returns: dict: Keys are dependant model names, values are dictionaries with required species frequencies dict: Extra information """ interacting_community = community.copy(copy_models=False, interacting=True, create_biomass=False, merge_extracellular_compartments=False) environment.apply(interacting_community.merged, inplace=True) # other values are copied from previous copy for b in interacting_community.organisms_biomass_reactions.itervalues(): interacting_community.merged.reactions[b].lb = 0 solver = solver_instance(interacting_community.merged) for org_id, rxns in interacting_community.organisms_reactions.iteritems(): org_var = 'y_{}'.format(org_id) solver.add_variable(org_var, 0, 1, vartype=VarType.BINARY, update_problem=False) for r_id in rxns: lb = min_growth if r_id == interacting_community.organisms_biomass_reactions[org_id] else -max_uptake solver.add_constraint('c_{}_lb'.format(r_id), {r_id: 1, org_var: -lb}, '>', 0, update_problem=False) solver.add_constraint('c_{}_ub'.format(r_id), {r_id: 1, org_var: -max_uptake}, '<', 0, update_problem=False) solver.update() scores = {} extras = {'dependencies': {}} for org_id, biomass_id in interacting_community.organisms_biomass_reactions.iteritems(): other_biomasses = {o for o in interacting_community.organisms if o != org_id} solver.add_constraint('SMETANA_Biomass', {interacting_community.organisms_biomass_reactions[org_id]: 1}, '>', min_growth) objective = {"y_{}".format(o): 1.0 for o in other_biomasses} previous_constraints = [] donors_list = [] for i in xrange(n_solutions): sol = solver.solve(objective, minimize=True, get_values=True) if sol.status != Status.OPTIMAL: if i == 0: donors_list = None # species can not grow break donors = [o for o in other_biomasses if sol.values["y_{}".format(o)]] donors_list.append(donors) previous_con = 'iteration_{}'.format(i) previous_constraints.append(previous_con) previous_sol = {"y_{}".format(o): 1 for o in donors} solver.add_constraint(previous_con, previous_sol, '<', len(previous_sol) - 1) solver.remove_constraint('SMETANA_Biomass') for con in previous_constraints: solver.remove_constraint(con) if donors_list: donors_list_n = float(len(donors_list)) donors_counter = Counter(chain(*donors_list)) scores[org_id] = {o: donors_counter[o]/donors_list_n for o in other_biomasses} extras['dependencies'][org_id] = donors_list else: scores[org_id] = None extras['dependencies'][org_id] = donors_list return scores, extras
def arFBA(model, reference_constraints, perturbed_constraints, weights): """ Run an allosteric regulated FBA (arFBA) simulation arguments: - model: an instance of AllostericModel (generated by the SBML loader) - reference_constraints: dict of reaction id to (lower bound, upper bound) - perturbed_constraints: dict of reaction id to (lower bound, upper bound) - weights: dict of (metabolite id, reaction id) to float (weighting factor for each allosteric interaction) returns: - flux distribution: dict of reaction id to float """ TOL = 1e-6 M = 1e3 solution_ref = pFBA(model, constraints=reference_constraints) v0 = solution_ref.values t0 = compute_turnover(model, v0) model_irrev, mapping = build_perturbed_model(model, perturbed_constraints) solver = solver_instance() solver.build_problem(model_irrev) m_r_lookup = model_irrev.metabolite_reaction_lookup_table() reg_m_r_lookup = model_irrev.metabolite_reaction_regulatory_lookup_table() regulators = [m_id for m_id, targets in reg_m_r_lookup.items() if len(targets) > 0] for m_id in regulators: solver.add_variable('t_' + m_id, 0, None, persistent=False, update_problem=False) for fwd_id, bwd_id in mapping.values(): solver.add_variable('y_' + fwd_id, vartype=VarType.BINARY, persistent=False, update_problem=False) solver.add_variable('y_' + bwd_id, vartype=VarType.BINARY, persistent=False, update_problem=False) for (m_id, r_id), kind in model.regulation.items(): if v0[r_id] > TOL and t0[m_id] > TOL: diff_pos = 'd+_{}_{}'.format(m_id, r_id) diff_neg = 'd-_{}_{}'.format(m_id, r_id) solver.add_variable(diff_pos, 0, None, persistent=False, update_problem=False) solver.add_variable(diff_neg, 0, None, persistent=False, update_problem=False) solver.update() for m_id in regulators: lhs = {r_id: coeff for r_id, coeff in m_r_lookup[m_id].items() if coeff > 0} lhs['t_' + m_id] = -1 solver.add_constraint('ct_' + m_id, lhs.items(), persistent=False, update_problem=False) for r_id, (fwd_id, bwd_id) in mapping.items(): solver.add_constraint('c_' + fwd_id, {fwd_id: 1, 'y_' + fwd_id: -M}.items(), '<', 0, persistent=False, update_problem=False) solver.add_constraint('c_' + bwd_id, {bwd_id: 1, 'y_' + bwd_id: -M}.items(), '<', 0, persistent=False, update_problem=False) solver.add_constraint('rev_' + r_id, {'y_' + fwd_id: 1, 'y_' + bwd_id: 1}.items(), '<', 1, persistent=False, update_problem=False) for (m_id, r_id), kind in model.regulation.items(): if v0[r_id] > TOL and t0[m_id] > TOL: diff_pos = 'd+_{}_{}'.format(m_id, r_id) diff_neg = 'd-_{}_{}'.format(m_id, r_id) if r_id in mapping: fwd_id, bwd_id = mapping[r_id] if kind == 1: lhs = {diff_pos: 1, fwd_id: -1/v0[r_id], bwd_id: -1/v0[r_id], 't_' + m_id: 1/t0[m_id]} solver.add_constraint('c' + diff_pos, lhs.items(), '>', 0, persistent=False, update_problem=False) lhs = {diff_neg: 1, fwd_id: 1/v0[r_id], bwd_id: 1/v0[r_id], 't_' + m_id: -1/t0[m_id]} solver.add_constraint('c' + diff_neg, lhs.items(), '>', 0, persistent=False, update_problem=False) else: lhs = {diff_pos: 1, fwd_id: -1/v0[r_id], bwd_id: -1/v0[r_id], 't_' + m_id: -1/t0[m_id]} solver.add_constraint('c' + diff_pos, lhs.items(), '>', -2, persistent=False, update_problem=False) lhs = {diff_neg: 1, fwd_id: 1/v0[r_id], bwd_id: 1/v0[r_id], 't_' + m_id: 1/t0[m_id]} solver.add_constraint('c' + diff_neg, lhs.items(), '>', 2, persistent=False, update_problem=False) else: if kind == 1: lhs = {diff_pos: 1, r_id: -1/v0[r_id], 't_' + m_id: 1/t0[m_id]} solver.add_constraint('c' + diff_pos, lhs.items(), '>', 0, persistent=False, update_problem=False) lhs = {diff_neg: 1, r_id: 1/v0[r_id], 't_' + m_id: -1/t0[m_id]} solver.add_constraint('c' + diff_neg, lhs.items(), '>', 0, persistent=False, update_problem=False) else: lhs = {diff_pos: 1, r_id: -1/v0[r_id], 't_' + m_id: -1/t0[m_id]} solver.add_constraint('c' + diff_pos, lhs.items(), '>', -2, persistent=False, update_problem=False) lhs = {diff_neg: 1, r_id: 1/v0[r_id], 't_' + m_id: 1/t0[m_id]} solver.add_constraint('c' + diff_neg, lhs.items(), '>', 2, persistent=False, update_problem=False) solver.update() objective = {r_id: -1 for r_id in model_irrev.reactions} for (m_id, r_id) in model.regulation.keys(): if (m_id, r_id) in weights and v0[r_id] > TOL and t0[m_id] > TOL: diff_pos = 'd+_{}_{}'.format(m_id, r_id) diff_neg = 'd-_{}_{}'.format(m_id, r_id) objective[diff_pos] = -weights[(m_id, r_id)] objective[diff_neg] = -weights[(m_id, r_id)] solution = solver.solve_lp(objective) if solution.status == Status.OPTIMAL: v = merge_fluxes(model, mapping, solution.values) else: v = None return v
def gapFill(model, universe, constraints=None, min_growth=0.1, scores=None, inplace=True, bigM=1e3, abstol=1e-9, solver=None, tag=None): """ Gap Fill a metabolic model by adding reactions from a reaction universe Args: model (CBModel): original model universe (CBModel): universe model constraints (dict): additional constraints (optional) min_growth (float): minimum growth rate (default: 0.1) scores (dict): reaction scores (optional, see notes) inplace (bool): modify given model in place (default: True) bigM (float): maximal reaction flux (default: 1000) abstol (float): minimum threshold to consider a reaction active (default: 1e-9) solver (Solver): solver instance (optional) tag (str): add a metadata tag to gapfilled reactions (optional) Returns: CBModel: gap filled model (if inplace=False) Notes: Scores can be used to make some reactions more likely to be included. Scored reactions have a penalty of 1/(1+score), which varies between [0, 1]. Unscored reactions have a penalty of 1. """ new_reactions = set(universe.reactions) - set(model.reactions) model = merge_models(model, universe, inplace, tag=tag) for r_id in new_reactions: if r_id.startswith('R_EX'): model.set_lower_bound(r_id, 0) if not solver: solver = solver_instance(model) if not scores: scores = {} if not hasattr(solver, '_gapfill_flag'): solver._gapfill_flag = True for r_id in new_reactions: solver.add_variable('y_' + r_id, 0, 1, vartype=VarType.BINARY, update_problem=False) solver.update() for r_id in new_reactions: solver.add_constraint('lb_' + r_id, { r_id: 1, 'y_' + r_id: bigM }, '>', 0, update_problem=False) solver.add_constraint('ub_' + r_id, { r_id: 1, 'y_' + r_id: -bigM }, '<', 0, update_problem=False) biomass = model.biomass_reaction solver.add_constraint('min_growth', {biomass: 1}, '>', min_growth, update_problem=False) solver.update() objective = { 'y_' + r_id: 1.0 / (1.0 + scores.get(r_id, 0.0)) for r_id in new_reactions } solution = solver.solve(linear=objective, minimize=True, constraints=constraints) if solution.status == Status.OPTIMAL: inactive = [ r_id for r_id in new_reactions if abs(solution.values[r_id]) < abstol ] else: # inactive = new_reactions raise RuntimeError('Failed to gapfill model for medium {}'.format(tag)) model.remove_reactions(inactive) del_metabolites = disconnected_metabolites(model) model.remove_metabolites(del_metabolites) if not inplace: return model
def minmax_reduction(model, scores, min_growth=0.1, min_atpm=0.1, eps=1e-3, bigM=1e3, default_score=-1, uptake_score=0, soft_score=1, soft_constraints=None, hard_constraints=None, solver=None, debug_output=None): """ Apply minmax reduction algorithm (MILP). Computes a binary reaction vector that optimizes the agreement with reaction scores (maximizes positive scores, and minimizes negative scores). It generates a fully connected reaction network (i.e. all reactions must be able to carry some flux). Args: model (CBModel): universal model scores (dict): reaction scores min_growth (float): minimal growth constraint min_atpm (float): minimal maintenance ATP constraint eps (float): minimal flux required to consider leaving the reaction in the model bigM (float): maximal reaction flux default_score (float): penalty score for reactions without an annotation score (default: -1.0). uptake_score (float): penalty score for using uptake reactions (default: 0.0). soft_score (float): score for soft constraints (default: 1.0) soft_constraints (dict): dictionary from reaction id to expected flux direction (-1, 1, 0) hard_constraints (dict): dictionary of flux bounds solver (Solver): solver instance (optional) Returns: Solution: optimization result """ if not solver: solver = solver_instance(model) objective = {} scores = scores.copy() reactions = list(scores.keys()) if not soft_constraints: soft_constraints = {} reactions += [r_id for r_id in soft_constraints if r_id not in reactions] if hard_constraints: solver.set_bounds(hard_constraints) # Add default score if default_score != 0: for r_id in model.reactions: if r_id not in reactions and not r_id.startswith('R_EX') and r_id != 'R_ATPM': scores[r_id] = default_score reactions.append(r_id) if not hasattr(solver, '_carveme_flag'): solver._carveme_flag = True biomass = model.biomass_reaction solver.add_constraint('min_growth', {biomass: 1}, '>', min_growth, update_problem=False) solver.add_constraint('min_atpm', {'R_ATPM': 1}, '>', min_atpm, update_problem=False) solver.neg_vars = [] solver.pos_vars = [] for r_id in reactions: if model.reactions[r_id].lb is None or model.reactions[r_id].lb < 0: y_r = 'yr_' + r_id solver.add_variable(y_r, 0, 1, vartype=VarType.BINARY, update_problem=False) solver.neg_vars.append(y_r) if model.reactions[r_id].ub is None or model.reactions[r_id].ub > 0: y_f = 'yf_' + r_id solver.add_variable(y_f, 0, 1, vartype=VarType.BINARY, update_problem=False) solver.pos_vars.append(y_f) if uptake_score != 0: for r_id in model.reactions: if r_id.startswith('R_EX'): solver.add_variable('y_' + r_id, 0, 1, vartype=VarType.BINARY, update_problem=False) solver.update() for r_id in reactions: y_r, y_f = 'yr_' + r_id, 'yf_' + r_id if y_r in solver.neg_vars and y_f in solver.pos_vars: solver.add_constraint('lb_' + r_id, {r_id: 1, y_f: -eps, y_r: bigM}, '>', 0, update_problem=False) solver.add_constraint('ub_' + r_id, {r_id: 1, y_f: -bigM, y_r: eps}, '<', 0, update_problem=False) solver.add_constraint('rev_' + r_id, {y_f: 1, y_r: 1}, '<', 1, update_problem=False) elif y_f in solver.pos_vars: solver.add_constraint('lb_' + r_id, {r_id: 1, y_f: -eps}, '>', 0, update_problem=False) solver.add_constraint('ub_' + r_id, {r_id: 1, y_f: -bigM}, '<', 0, update_problem=False) elif y_r in solver.neg_vars: solver.add_constraint('lb_' + r_id, {r_id: 1, y_r: bigM}, '>', 0, update_problem=False) solver.add_constraint('ub_' + r_id, {r_id: 1, y_r: eps}, '<', 0, update_problem=False) if uptake_score != 0: for r_id in model.reactions: if r_id.startswith('R_EX'): solver.add_constraint('lb_' + r_id, {r_id: 1, 'y_' + r_id: bigM}, '>', 0, update_problem=False) solver.update() for r_id in reactions: y_r, y_f = 'yr_' + r_id, 'yf_' + r_id if r_id in soft_constraints: sign = soft_constraints[r_id] if sign > 0: w_f, w_r = soft_score, 0 elif sign < 0: w_f, w_r = 0, soft_score else: w_f, w_r = -soft_score, -soft_score if y_f in solver.pos_vars: if r_id in scores: objective[y_f] = scores[r_id] if r_id in soft_constraints: objective[y_f] = w_f if y_r in solver.neg_vars: if r_id in scores: objective[y_r] = scores[r_id] if r_id in soft_constraints: objective[y_r] = w_r if uptake_score != 0: for r_id in model.reactions: if r_id.startswith('R_EX') and r_id not in soft_constraints: objective['y_' + r_id] = uptake_score solver.set_objective(linear=objective, minimize=False) if debug_output: solver.write_to_file(debug_output + "_milp_problem.lp") solution = solver.solve() return solution
def metabolite_production_score(community, environment=None, abstol=1e-3, exclude=None): #TODO: implement excluded """ Discover metabolites which species can produce in community Zelezniak A. et al, Metabolic dependencies drive species co-occurrence in diverse microbial communities (PNAS 2015) Args: community (Community): community object environment (Environment): Metabolic environment in which the SMETANA score is colulated min_growth (float): minimum growth rate (default: 0.1) max_uptake (float): maximum uptake rate (default: 10) abstol (float): tolerance for detecting a non-zero exchange flux (default: 1e-6) Returns: dict: Keys are model names, values are list with produced compounds dict: Extra information """ if environment: environment.apply(community.merged, inplace=True, warning=False) env_compounds = environment.get_compounds(format_str="\'{}\'[5:-5]") else: env_compounds = set() for exchange_rxns in community.organisms_exchange_reactions.values(): for r_id in exchange_rxns.keys(): rxn = community.merged.reactions[r_id] if rxn.ub is None: rxn.ub = 1000 solver = solver_instance(community.merged) scores = {} for org_id, exchange_rxns in community.organisms_exchange_reactions.items( ): scores[org_id] = {} remaining = [ r_id for r_id, cnm in exchange_rxns.items() if cnm.original_metabolite not in env_compounds ] while len(remaining) > 0: sol = solver.solve(linear={r_id: 1 for r_id in remaining}, minimize=False, get_values=remaining) if sol.status != Status.OPTIMAL: break blocked = [r_id for r_id in remaining if sol.values[r_id] < abstol] if len(blocked) == len(remaining): break for r_id in remaining: if sol.values[r_id] >= abstol: cnm = exchange_rxns[r_id] scores[org_id][cnm.original_metabolite] = 1 remaining = blocked for r_id in remaining: cnm = exchange_rxns[r_id] scores[org_id][cnm.original_metabolite] = 0 return scores
def GIMME(model, gene_exp, cutoff=25, growth_frac=0.9, constraints=None, parsimonious=False): """ Run a GIMME simulation (Becker and Palsson, 2008). Arguments: model (CBModel): model gene_exp (dict): transcriptomics data cutoff (int): percentile cuttof (default: 25) growth_frac (float): minimum growth requirement (default: 0.9) constraints (dict): additional constraints parsimonious (bool): compute a parsimonious solution (default: False) Returns: Solution: solution """ rxn_exp = gene_to_reaction_expression(model, gene_exp, or_func=max) threshold = percentile(list(rxn_exp.values()), cutoff) coeffs = {r_id: threshold-val for r_id, val in rxn_exp.items() if val < threshold} solver = solver_instance(model) wt_solution = FBA(model, constraints=constraints, solver=solver) if not constraints: constraints = {} biomass = model.biomass_reaction constraints[biomass] = (growth_frac * wt_solution.values[biomass], None) for r_id in model.reactions: if model.reactions[r_id].reversible: pos, neg = r_id + '+', r_id + '-' solver.add_variable(pos, 0, None, persistent=False, update_problem=False) solver.add_variable(neg, 0, None, persistent=False, update_problem=False) solver.update() for r_id in model.reactions: if model.reactions[r_id].reversible: pos, neg = r_id + '+', r_id + '-' solver.add_constraint('c' + pos, {r_id: -1, pos: 1}, '>', 0, persistent=False, update_problem=False) solver.add_constraint('c' + neg, {r_id: 1, neg: 1}, '>', 0, persistent=False, update_problem=False) solver.update() objective = dict() for r_id, val in coeffs.items(): if model.reactions[r_id].reversible: pos, neg = r_id + '+', r_id + '-' objective[pos] = val objective[neg] = val else: objective[r_id] = val solution = solver.solve(objective, minimize=True, constraints=constraints) if parsimonious: pre_solution = solution solver.add_constraint('obj', objective, '=', pre_solution.fobj) objective = dict() for r_id in model.reactions: if model.reactions[r_id].reversible: pos, neg = r_id + '+', r_id + '-' objective[pos] = 1 objective[neg] = 1 else: objective[r_id] = 1 solution = solver.solve(objective, minimize=True, constraints=constraints) solver.remove_constraint('obj') solution.pre_solution = pre_solution return solution
def GIMME(model, gene_exp, cutoff=25, growth_frac=0.9, constraints=None, parsimonious=False): """ Run a GIMME simulation (Becker and Palsson, 2008). Arguments: model (CBModel): model gene_exp (dict): transcriptomics data cutoff (int): percentile cuttof (default: 25) growth_frac (float): minimum growth requirement (default: 0.9) constraints (dict): additional constraints parsimonious (bool): compute a parsimonious solution (default: False) Returns: Solution: solution """ rxn_exp = gene_to_reaction_expression(model, gene_exp, or_func=max) threshold = percentile(rxn_exp.values(), cutoff) coeffs = {r_id: threshold-val for r_id, val in rxn_exp.items() if val < threshold} solver = solver_instance(model) wt_solution = FBA(model, constraints=constraints, solver=solver) if not constraints: constraints = {} biomass = model.biomass_reaction constraints[biomass] = (growth_frac * wt_solution.values[biomass], None) for r_id in model.reactions: if model.reactions[r_id].reversible: pos, neg = r_id + '+', r_id + '-' solver.add_variable(pos, 0, None, persistent=False, update_problem=False) solver.add_variable(neg, 0, None, persistent=False, update_problem=False) solver.update() for r_id in model.reactions: if model.reactions[r_id].reversible: pos, neg = r_id + '+', r_id + '-' solver.add_constraint('c' + pos, {r_id: -1, pos: 1}, '>', 0, persistent=False, update_problem=False) solver.add_constraint('c' + neg, {r_id: 1, neg: 1}, '>', 0, persistent=False, update_problem=False) solver.update() objective = dict() for r_id, val in coeffs.items(): if model.reactions[r_id].reversible: pos, neg = r_id + '+', r_id + '-' objective[pos] = val objective[neg] = val else: objective[r_id] = val solution = solver.solve(objective, minimize=True, constraints=constraints) if parsimonious: pre_solution = solution solver.add_constraint('obj', objective, '=', pre_solution.fobj) objective = dict() for r_id in model.reactions: if model.reactions[r_id].reversible: pos, neg = r_id + '+', r_id + '-' objective[pos] = 1 objective[neg] = 1 else: objective[r_id] = 1 solution = solver.solve(objective, minimize=True, constraints=constraints) solver.remove_constraint('obj') solution.pre_solution = pre_solution return solution
def marge(model, rel_expression, transformed=False, constraints_a=None, constraints_b=None, rel_constraints=None, growth_frac_a=0.0, growth_frac_b=0.0, gene_prefix='G_', pseudo_genes=None): """ Metabolic Analysis with Relative Gene Expression (MARGE) Args: model (CBModel): organism model rel_expression (dict): relative gene expression (condition B / condition A) transformed (bool): True if the model is already in extended GPR format (default: False) constraints_a (dict): additional constrants to use for condition A (optional) constraints_b (dict): additional constrants to use for condition B (optional) rel_constraints (dict): relative constraints between conditions (such as flux ratios) (default: False) growth_frac_a (float): minimum growth rate in condition A (default: 0.0) growth_frac_b (float): minimum growth rate in condition B (default: 0.0) gene_prefix (str): prefix used in gene identifiers (default: 'G_') pseudo_genes (list): pseudo-genes in model to ignore (e.g: 'spontaneous') (optional) Returns: dict: fluxes in condition A dict: fluxes in condition B Solution: solver solution for step 1 (minimize flux changes) Solution: solver solution for step 2 (minimize absolute fluxes) """ if not transformed: model = gpr_transform(model, inplace=False, gene_prefix=gene_prefix, pseudo_genes=pseudo_genes) if constraints_a is None: constraints_a = {} else: constraints_a = model.convert_constraints(constraints_a) if constraints_b is None: constraints_b = {} else: constraints_b = model.convert_constraints(constraints_b) if rel_constraints is None: rel_constraints = {} if growth_frac_a > 0: biomass = model.biomass_reaction sol_a = FBA(model, constraints=constraints_a) if sol_a.status != Status.OPTIMAL: print('Failed to solve reference model for condition A.') return None, None, None, None constraints_a[biomass] = (sol_a.fobj * growth_frac_a, None) if growth_frac_b > 0: biomass = model.biomass_reaction sol_b = FBA(model, constraints=constraints_b) if sol_b.status != Status.OPTIMAL: print('Failed to solve reference model for condition B.') return None, None, None, None constraints_b[biomass] = (sol_b.fobj * growth_frac_b, None) solver = solver_instance() for r_id, reaction in model.reactions.items(): lb_a, ub_a = constraints_a.get(r_id, (reaction.lb, reaction.ub)) solver.add_variable(r_id + '_a', lb_a, ub_a, update_problem=False) lb_b, ub_b = constraints_b.get(r_id, (reaction.lb, reaction.ub)) solver.add_variable(r_id + '_b', lb_b, ub_b, update_problem=False) for g_id, val in rel_expression.items(): solver.add_variable(g_id + '_+', 0, None, update_problem=False) solver.add_variable(g_id + '_-', 0, None, update_problem=False) solver.update() table = model.metabolite_reaction_lookup() for m_id in model.metabolites: stoich_a = {r_id + '_a': val for r_id, val in table[m_id].items()} solver.add_constraint(m_id + '_a', stoich_a, update_problem=False) stoich_b = {r_id + '_b': val for r_id, val in table[m_id].items()} solver.add_constraint(m_id + '_b', stoich_b, update_problem=False) for r_id, ratio in rel_constraints.items(): constr = {} expr_a = model.convert_id_to_expr(r_id, -ratio) expr_b = model.convert_id_to_expr(r_id, 1) constr.update({r_id2 + '_a': val for r_id2, val in expr_a.items()}) constr.update({r_id2 + '_b': val for r_id2, val in expr_b.items()}) solver.add_constraint(r_id + '_rel', constr, update_problem=False) for g_id, val in rel_expression.items(): u_id_a = 'u_' + g_id[len(gene_prefix):] + '_a' u_id_b = 'u_' + g_id[len(gene_prefix):] + '_b' solver.add_constraint(g_id + '_c+', {g_id + '_+': 1, u_id_b: -1, u_id_a: val}, '>', 0, update_problem=False) solver.add_constraint(g_id + '_c-', {g_id + '_-': 1, u_id_b: 1, u_id_a: -val}, '>', 0, update_problem=False) solver.update() objective1 = {} for g_id in rel_expression.keys(): objective1[g_id + '_+'] = 1 objective1[g_id + '_-'] = 1 solution1 = solver.solve(objective1, minimize=True) if solution1.status != Status.OPTIMAL: print('Failed to solve first problem.') return None, None, solution1, None opt_tol = 1e-6 for g_id, val in rel_expression.items(): solver.add_constraint(g_id + '_o+', {g_id + '_+': 1}, '<', solution1.values[g_id + '_+'] + opt_tol, update_problem=False) solver.add_constraint(g_id + '_o-', {g_id + '_-': 1}, '<', solution1.values[g_id + '_-'] + opt_tol, update_problem=False) solver.update() objective2 = {} for r_id in model.u_reactions: objective2[r_id + '_a'] = 1 objective2[r_id + '_b'] = 1 solution2 = solver.solve(objective2, minimize=True) if solution2.status != Status.OPTIMAL: print('Failed to solve second problem.') return None, None, solution1, solution2 fluxes_a = {r_id: solution2.values[r_id + '_a'] for r_id in model.reactions} fluxes_b = {r_id: solution2.values[r_id + '_b'] for r_id in model.reactions} fluxes_a = model.convert_fluxes(fluxes_a) fluxes_b = model.convert_fluxes(fluxes_b) return fluxes_a, fluxes_b, solution1, solution2
def minimal_medium(model, exchange_reactions=None, direction=-1, min_mass_weight=False, min_growth=1, max_uptake=100, max_compounds=None, n_solutions=1, validate=True, abstol=1e-6, warnings=True, use_pool=False, pool_gap=None, solver=None): """ Minimal medium calculator. Determines the minimum number of medium components for the organism to grow. Notes: There are two options provided: * simply minimize the total number of components * minimize nutrients by molecular weight (as implemented by Zarecki et al, 2014) Args: model (CBModel): model exchange_reactions: list of exchange reactions (if not provided all model exchange reactions are used) direction (int): direction of uptake reactions (negative or positive, default: -1) min_mass_weight (bool): minimize by molecular weight of nutrients (default: False) min_growth (float): minimum growth rate (default: 1) max_uptake (float): maximum uptake rate (default: 100) max_compounds (int): limit maximum number of compounds (optional) n_solutions (int): enumerate multiple solutions (default: 1) validate (bool): validate solution using FBA (for debugging purposes, default: False) abstol (float): tolerance for detecting a non-zero exchange flux (default: 1e-6) Returns: list: minimal set of exchange reactions Solution: solution from solver """ def warn_wrapper(message): if warnings: warn(message) if exchange_reactions is None: exchange_reactions = list(model.get_exchange_reactions()) if not solver: solver = solver_instance(model) persistent = True else: persistent = False solver.set_lower_bounds({model.biomass_reaction: min_growth}) for r_id in exchange_reactions: solver.add_variable('y_' + r_id, 0, 1, vartype=VarType.BINARY, update_problem=False, persistent=persistent) solver.update() for r_id in exchange_reactions: if direction < 0: solver.add_constraint('c_' + r_id, { r_id: 1, 'y_' + r_id: max_uptake }, '>', 0, update_problem=False, persistent=persistent) else: solver.add_constraint('c_' + r_id, { r_id: 1, 'y_' + r_id: -max_uptake }, '<', 0, update_problem=False, persistent=persistent) if max_compounds: lhs = {'y_' + r_id: 1 for r_id in exchange_reactions} solver.add_constraint('max_cmpds', lhs, '<', max_compounds, update_problem=False, persistent=persistent) solver.update() if min_mass_weight: objective = {} for r_id in exchange_reactions: if direction < 0: compounds = model.reactions[r_id].get_substrates() else: compounds = model.reactions[r_id].get_products() if len(compounds) > 1: warn_wrapper( 'Multiple compounds in exchange reaction (ignored)') continue if len(compounds) == 0: warn_wrapper('No compounds in exchange reaction (ignored)') continue metabolite = model.metabolites[compounds[0]] if 'FORMULA' not in metabolite.metadata: warn_wrapper('No formula for compound (ignored)') continue formulas = metabolite.metadata['FORMULA'].split(';') if len(formulas) > 1: warn_wrapper('Multiple formulas for compound') weight = molecular_weight(formulas[0]) objective['y_' + r_id] = weight else: objective = {'y_' + r_id: 1 for r_id in exchange_reactions} result, ret_sols = None, None if direction < 0: constraints = { r_id: (-max_uptake, model.reactions[r_id].ub) for r_id in exchange_reactions } else: constraints = { r_id: (model.reactions[r_id].lb, max_uptake) for r_id in exchange_reactions } if n_solutions == 1: solution = solver.solve(objective, minimize=True, constraints=constraints, get_values=exchange_reactions) if solution.status != Status.OPTIMAL: warn_wrapper('No solution found') result, ret_sols = None, solution else: medium = get_medium(solution, exchange_reactions, direction, abstol) if validate: validate_solution(model, medium, exchange_reactions, direction, min_growth, max_uptake) result, ret_sols = medium, solution elif use_pool: solutions = solver.solve(objective, minimize=True, constraints=constraints, get_values=exchange_reactions, pool_size=n_solutions, pool_gap=pool_gap) if solutions is None: result, ret_sols = [], [] else: media = [ get_medium(solution, exchange_reactions, direction, abstol) for solution in solutions ] result, ret_sols = media, solutions else: media = [] solutions = [] for i in range(0, n_solutions): if i > 0: constr_id = 'iteration_{}'.format(i) previous_sol = {'y_' + r_id: 1 for r_id in medium} solver.add_constraint(constr_id, previous_sol, '<', len(previous_sol) - 1) solution = solver.solve(objective, minimize=True, constraints=constraints, get_values=exchange_reactions) if solution.status != Status.OPTIMAL: break medium = get_medium(solution, exchange_reactions, direction, abstol) media.append(medium) solutions.append(solution) result, ret_sols = media, solutions if not persistent: solver.clean_up() return result, ret_sols
def species_coupling_score(community, environment=None, min_growth=0.1, n_solutions=100, verbose=True, abstol=1e-6, use_pool=False): """ Calculate frequency of community species dependency on each other Zelezniak A. et al, Metabolic dependencies drive species co-occurrence in diverse microbial communities (PNAS 2015) Args: community (Community): microbial community environment (Environment): metabolic environment (optional) min_growth (float): minimum growth rate (default: 0.1) abstol (float): tolerance for detecting a non-zero exchange flux (default: 1e-6) n_solutions (int): number of alternative solutions to calculate (default: 100) Returns: dict: Keys are dependent organisms, values are dictionaries with required organism frequencies """ community = community.copy(copy_models=False, interacting=True, create_biomass=False, merge_extracellular_compartments=False) if environment: environment.apply(community.merged, inplace=True, warning=False) for b in community.organisms_biomass_reactions.values(): community.merged.reactions[b].lb = 0 solver = solver_instance(community.merged) for org_id, rxns in community.organisms_reactions.items(): org_var = 'y_{}'.format(org_id) solver.add_variable(org_var, 0, 1, vartype=VarType.BINARY, update_problem=False) solver.update() bigM = 100 for org_id, rxns in community.organisms_reactions.items(): org_var = 'y_{}'.format(org_id) for r_id in rxns: if r_id == community.organisms_biomass_reactions[org_id]: continue solver.add_constraint('c_{}_lb'.format(r_id), { r_id: 1, org_var: bigM }, '>', 0, update_problem=False) solver.add_constraint('c_{}_ub'.format(r_id), { r_id: 1, org_var: -bigM }, '<', 0, update_problem=False) solver.update() scores = {} for org_id, biomass_id in community.organisms_biomass_reactions.items(): other = {o for o in community.organisms if o != org_id} solver.add_constraint( 'SMETANA_Biomass', {community.organisms_biomass_reactions[org_id]: 1}, '>', min_growth) objective = {"y_{}".format(o): 1.0 for o in other} if not use_pool: previous_constraints = [] donors_list = [] failed = False for i in range(n_solutions): sol = solver.solve(objective, minimize=True, get_values=objective.keys()) if sol.status != Status.OPTIMAL: failed = i == 0 break donors = [ o for o in other if sol.values["y_{}".format(o)] > abstol ] donors_list.append(donors) previous_con = 'iteration_{}'.format(i) previous_constraints.append(previous_con) previous_sol = {"y_{}".format(o): 1 for o in donors} solver.add_constraint(previous_con, previous_sol, '<', len(previous_sol) - 1) solver.remove_constraints(['SMETANA_Biomass'] + previous_constraints) if not failed: donors_list_n = float(len(donors_list)) donors_counter = Counter(chain(*donors_list)) scores[org_id] = { o: donors_counter[o] / donors_list_n for o in other } else: if verbose: warn('SCS: Failed to find a solution for growth of ' + org_id) scores[org_id] = None else: sols = solver.solve(objective, minimize=True, get_values=objective.keys(), pool_size=n_solutions, pool_gap=0.1) solver.remove_constraint('SMETANA_Biomass') if len(sols) == 0: scores[org_id] = None if verbose: warn('SCS: Failed to find a solution for growth of ' + org_id) else: donor_count = [ o for sol in sols for o in other if sol.values["y_{}".format(o)] > abstol ] donor_count = Counter(donor_count) scores[org_id] = { o: donor_count[o] / float(len(sols)) for o in other } return scores
def mro_score(community, environment=None, direction=-1, min_mass_weight=False, min_growth=0.1, max_uptake=10, validate=False, verbose=True, exclude=None): """ Implements the metabolic resource overlap (MRO) score as defined in (Zelezniak et al, 2015). Args: community (Community): microbial community model environment (Environment): Metabolic environment in which the SMETANA score is colulated direction (int): direction of uptake reactions (negative or positive, default: -1) extracellular_id (str): extracellular compartment id min_mass_weight (bool): minimize by molecular weight of nutrients (default: False) min_growth (float): minimum growth rate (default: 0.1) max_uptake (float): maximum uptake rate (default: 10) Returns: float: MRO score """ noninteracting = community.copy(copy_models=False, interacting=False, create_biomass=True) exch_reactions = set(community.merged.get_exchange_reactions()) if environment: environment.apply(community.merged, inplace=True, warning=False) environment.apply(noninteracting.merged, inplace=True, warning=False) exch_reactions &= set(environment) noninteracting_medium, sol = minimal_medium( noninteracting.merged, exchange_reactions=exch_reactions, direction=direction, min_mass_weight=min_mass_weight, min_growth=min_growth, max_uptake=max_uptake, validate=validate, warnings=verbose) solutions = [sol] if sol.status != Status.OPTIMAL: if verbose: warn( 'MRO: Failed to find a valid solution for non-interacting community' ) return None, None # anabiotic environment is limited to non-interacting community minimal media noninteracting_exch = set(noninteracting_medium) noninteracting_env = Environment.from_reactions(noninteracting_exch, max_uptake=max_uptake) noninteracting_env.apply(community.merged, inplace=True) individual_media = {} if exclude is not None: exclude = {'M_{}_e'.format(x) for x in exclude} else: exclude = {} solver = solver_instance(community.merged) for org_id in community.organisms: biomass_reaction = community.organisms_biomass_reactions[org_id] community.merged.biomass_reaction = biomass_reaction org_noninteracting_exch = community.organisms_exchange_reactions[ org_id] medium, sol = minimal_medium( community.merged, exchange_reactions=org_noninteracting_exch, direction=direction, min_mass_weight=min_mass_weight, min_growth=min_growth, max_uptake=max_uptake, validate=validate, solver=solver, warnings=verbose) solutions.append(sol) if sol.status != Status.OPTIMAL: warn('MRO: Failed to find a valid solution for: ' + org_id) return None, None individual_media[org_id] = { org_noninteracting_exch[r].original_metabolite for r in medium } - exclude pairwise = {(o1, o2): individual_media[o1] & individual_media[o2] for o1, o2 in combinations(community.organisms, 2)} numerator = len(individual_media) * sum(map(len, pairwise.values())) denominator = float( len(pairwise) * sum(map(len, individual_media.values()))) score = numerator / denominator if denominator != 0 else None extras = { 'noninteracting_medium': noninteracting_medium, 'individual_media': individual_media, 'pairwise': pairwise, 'solutions': solutions } return score, extras
def metabolite_production_score(community, environment=None, max_uptake=100, min_growth=1.0, abstol=1e-6): """ Discover metabolites which species can produce in community Zelezniak A. et al, Metabolic dependencies drive species co-occurrence in diverse microbial communities (PNAS 2015) Args: community (Community): community object environment (Environment): Metabolic environment in which the SMETANA score is colulated min_growth (float): minimum growth rate (default: 1) max_uptake (float): maximum uptake rate (default: 100) abstol (float): tolerance for detecting a non-zero exchange flux (default: 1e-6) Returns: dict: Keys are model names, values are list with produced compounds dict: Extra information """ interacting_community = community.copy(copy_models=False, interacting=True, create_biomass=False, merge_extracellular_compartments=False) if environment: environment.apply(interacting_community.merged, inplace=True) reactions = interacting_community.merged.reactions rxn2met = {ex.organism_reaction: ex.original_metabolite for org_exchanges in interacting_community.organisms_exchange_reactions.itervalues() for ex in org_exchanges.itervalues()} media_metabolites = {met for exch_id in interacting_community.merged.get_exchange_reactions() for met in interacting_community.merged.reactions[exch_id].stoichiometry if exch_id in reactions and reactions[exch_id].lb < 0} solver = solver_instance(interacting_community.merged) # Binary constraints that forces biomass production of any model that activates exchanges for org_id, exchanges in interacting_community.organisms_exchange_reactions.iteritems(): org_var = 'y_{}'.format(org_id) solver.add_variable(org_var, 0, 1, vartype=VarType.BINARY, update_problem=False) for r_id in exchanges: if r_id == interacting_community.organisms_biomass_reactions[org_id]: lb = min_growth else: lb = -max_uptake if reactions[r_id].lb is None else reactions[r_id].lb ub = max_uptake if reactions[r_id].ub is None else reactions[r_id].ub solver.add_constraint('c_{}_lb'.format(r_id), {r_id: 1, org_var: -lb}, '>', 0, update_problem=False) solver.add_constraint('c_{}_ub'.format(r_id), {r_id: 1, org_var: -ub}, '<', 0, update_problem=False) scores = {} for org_id, exchange_rxns in community.organisms_exchange_reactions.iteritems(): # Remove metabolites present in the medium from the list of product candidates exchange_rxns = {rxn_id for rxn_id, cnm in exchange_rxns.iteritems() if cnm.extracellular_metabolite not in media_metabolites} org_biomass = community.organisms_biomass_reactions[org_id] solver.add_constraint('SMETANA_Biomass', {org_biomass: 1}, '>', min_growth, update_problem=False) solver.update() org_products = set() for i in xrange(30000): if not exchange_rxns: break objective = {r_id: 1.0 for r_id in exchange_rxns} solution = solver.solve(objective, minimize=False) if solution.status != Status.OPTIMAL: if i == 0: org_products = None break i_products = {r_id for r_id in exchange_rxns if solution.values[r_id] > abstol} org_products = org_products.union(i_products) exchange_rxns = exchange_rxns - i_products if not i_products: break if org_products is not None: scores[org_id] = {rxn2met[r_id] for r_id in org_products} else: scores[org_id] = None solver.remove_constraint('SMETANA_Biomass') return scores, {}
def marge(model, rel_expression, transformed=False, constraints_a=None, constraints_b=None, rel_constraints=None, growth_frac_a=None, growth_frac_b=None, pseudo_genes=None): if not transformed: model = gpr_transform(model, inplace=False, pseudo_genes=pseudo_genes) if constraints_a is None: constraints_a = {} else: constraints_a = model.convert_constraints(constraints_a) if constraints_b is None: constraints_b = {} else: constraints_b = model.convert_constraints(constraints_b) if rel_constraints is None: rel_constraints = {} if growth_frac_a is not None: biomass = model.biomass_reaction sol_a = FBA(model, constraints=constraints_a) if sol_a.status != Status.OPTIMAL: print('Failed to solve reference model for condition A.') return None, None, None, None constraints_a[biomass] = (sol_a.fobj * growth_frac_a, None) if growth_frac_b is not None: biomass = model.biomass_reaction sol_b = FBA(model, constraints=constraints_b) if sol_b.status != Status.OPTIMAL: print('Failed to solve reference model for condition B.') return None, None, None, None constraints_b[biomass] = (sol_b.fobj * growth_frac_b, None) solver = solver_instance() for r_id, reaction in model.reactions.items(): lb_a, ub_a = constraints_a.get(r_id, (reaction.lb, reaction.ub)) solver.add_variable(r_id + '_a', lb_a, ub_a, update_problem=False) lb_b, ub_b = constraints_b.get(r_id, (reaction.lb, reaction.ub)) solver.add_variable(r_id + '_b', lb_b, ub_b, update_problem=False) for g_id, val in rel_expression.items(): solver.add_variable(g_id + '_+', 0, None, update_problem=False) solver.add_variable(g_id + '_-', 0, None, update_problem=False) solver.update() table = model.metabolite_reaction_lookup() for m_id in model.metabolites: stoich_a = {r_id + '_a': val for r_id, val in table[m_id].items()} solver.add_constraint(m_id + '_a', stoich_a, update_problem=False) stoich_b = {r_id + '_b': val for r_id, val in table[m_id].items()} solver.add_constraint(m_id + '_b', stoich_b, update_problem=False) for r_id, ratio in rel_constraints.items(): constr = {} expr_a = model.convert_id_to_expr(r_id, -ratio) expr_b = model.convert_id_to_expr(r_id, 1) constr.update({r_id2 + '_a': val for r_id2, val in expr_a.items()}) constr.update({r_id2 + '_b': val for r_id2, val in expr_b.items()}) solver.add_constraint(r_id + '_rel', constr, update_problem=False) for g_id, val in rel_expression.items(): u_id_a = 'u_' + g_id[2:] + '_a' u_id_b = 'u_' + g_id[2:] + '_b' solver.add_constraint(g_id + '_c+', {g_id + '_+': 1, u_id_b: -1, u_id_a: val}, '>', 0, update_problem=False) solver.add_constraint(g_id + '_c-', {g_id + '_-': 1, u_id_b: 1, u_id_a: -val}, '>', 0, update_problem=False) solver.update() objective1 = {} for g_id in rel_expression.keys(): objective1[g_id + '_+'] = 1 objective1[g_id + '_-'] = 1 solution1 = solver.solve(objective1, minimize=True) if solution1.status != Status.OPTIMAL: print('Failed to solve first problem.') return None, None, solution1, None opt_tol = 1e-6 for g_id, val in rel_expression.items(): solver.add_constraint(g_id + '_o+', {g_id + '_+': 1}, '<', solution1.values[g_id + '_+'] + opt_tol, update_problem=False) solver.add_constraint(g_id + '_o-', {g_id + '_-': 1}, '<', solution1.values[g_id + '_-'] + opt_tol, update_problem=False) solver.update() objective2 = {} for r_id in model.u_reactions: objective2[r_id + '_a'] = 1 objective2[r_id + '_b'] = 1 solution2 = solver.solve(objective2, minimize=True) if solution2.status != Status.OPTIMAL: print('Failed to solve second problem.') return None, None, solution1, solution2 fluxes_a = {r_id: solution2.values[r_id + '_a'] for r_id in model.reactions} fluxes_b = {r_id: solution2.values[r_id + '_b'] for r_id in model.reactions} fluxes_a = model.convert_fluxes(fluxes_a) fluxes_b = model.convert_fluxes(fluxes_b) return fluxes_a, fluxes_b, solution1, solution2
def multiGapFill(model, universe, media, media_db, min_growth=0.1, max_uptake=10, scores=None, inplace=True, bigM=1e3, exchange_format=None, pool_size=0, pool_gap=None, int_constr=0.00000001): """ Gap Fill a metabolic model for multiple environmental conditions Args: model (CBModel): original model universe (CBModel): universe model media (list): list of growth media ids media_db (dict): growth media database (see notes) min_growth (float): minimum growth rate (default: 0.1) max_uptake (float): maximum uptake rate (default: 10) scores (dict): reaction scores (optional, see *gapFill* for details) inplace (bool): modify given model in place (default: True) bigM (float): maximal reaction flux (default: 1000) pool_size (int): solution pool of given size (optional) pool_gap (float): for MIP algo; maximum relative gap for solutions in pool (optional) Returns: CBModel: gap filled model (if inplace=False) Notes: *media_db* is a dict from medium name to the list of respective compounds. """ if not inplace: model = model.copy() new_reactions = set(universe.reactions) - set(model.reactions) for r_id in new_reactions: if r_id.startswith('R_EX'): universe.set_lower_bound(r_id, 0) merged_model = merge_models(model, universe, inplace=False) solver = solver_instance(merged_model) # For Architect: Needed to fix this so that we use only one medium. Actual value of media does not matter. for medium_name in media: if medium_name in media_db: compounds = media_db[medium_name] constraints = medium_to_constraints( merged_model, compounds, max_uptake=max_uptake, inplace=False, exchange_format=exchange_format, verbose=False) lists_of_added_reactions = gapFill(model, universe, constraints=constraints, min_growth=min_growth, scores=scores, inplace=True, bigM=bigM, solver=solver, tag=medium_name, pool_size=pool_size, pool_gap=pool_gap, int_constr=int_constr) return lists_of_added_reactions else: print('Medium {} not in database, ignored.'.format(medium_name))
def minmax_reduction(model, scores, min_growth=0.1, min_atpm=0.1, eps=1e-5, bigM=1e3, default_score=-1.0, uptake_score=1, soft_score=1.0, soft_constraints=[], hard_constraints=None, ref_reactions=[], ref_score=0.0, solver=None, debug_output=None, feast=1e-6, opti=1e-5): """ Apply minmax reduction algorithm (MILP). Computes a binary reaction vector that optimizes the agreement with reaction scores (maximizes positive scores, and minimizes negative scores). It generates a fully connected reaction network (i.e. all reactions must be able to carry some flux). Args: model (CBModel): universal model scores (dict): reaction scores min_growth (float): minimal growth constraint min_atpm (float): minimal maintenance ATP constraint eps (float): minimal flux required to consider leaving the reaction in the model bigM (float): maximal reaction flux default_score (float): penalty score for reactions without an annotation score (default: -1.0). uptake_score (float): penalty score for using uptake reactions (default: 0.0). soft_score (float): score for soft constraints (default: 1.0) soft_constraints (dict): dictionary from reaction id to expected flux direction (-1, 1, 0) hard_constraints (dict): dictionary of flux bounds solver (Solver): solver instance (optional) Returns: Solution: optimization result """ if not solver: solver = solver_instance(model) objective = {} scores = scores.copy() reactions = [] for r_id in model.reactions: if r_id not in reactions and not r_id.endswith('_E'): reactions.append(r_id) if soft_constraints: reactions += [r_id for r_id in soft_constraints if r_id not in reactions] else: soft_constraints = {} if hard_constraints: solver.set_bounds(hard_constraints) # R_UF01847_CE is the ATP maintenance reaction # if the default score is lower than 0 set all the reactions to the default score except for the exchange reactions # and the ATP maintenance reaction if default_score != 0: for r_id in model.reactions: if r_id not in reactions and r_id not in ref_reactions and not r_id.endswith( '_E') and r_id != 'R_UF01847_CE': scores[r_id] = default_score reactions.append(r_id) if ref_score != 0: for r_id in ref_reactions: if r_id not in reactions and r_id != 'R_UF01847_CE': scores[r_id] = ref_score reactions.append(r_id) if not hasattr(solver, '_carveme_flag'): solver._carveme_flag = True solver.add_constraint('min_growth', {'R_BIOMASS': 1}, '>', min_growth, update_problem=False) solver.add_constraint('min_atpm', {'R_UF01847_CE': 1}, '>', min_atpm, update_problem=False) solver.neg_vars = [] solver.pos_vars = [] for r_id in reactions: if model.reactions[r_id].lb is None or model.reactions[r_id].lb < 0: y_r = 'yr_' + r_id solver.add_variable(y_r, 0, 1, vartype=VarType.BINARY, update_problem=False) solver.neg_vars.append(y_r) if model.reactions[r_id].ub is None or model.reactions[r_id].ub > 0: y_f = 'yf_' + r_id solver.add_variable(y_f, 0, 1, vartype=VarType.BINARY, update_problem=False) solver.pos_vars.append(y_f) if uptake_score != 0: for r_id in model.reactions: if r_id.endswith('_E'): solver.add_variable('y_' + r_id, 0, 1, vartype=VarType.BINARY, update_problem=False) solver.update() for r_id in reactions: y_r, y_f = 'yr_' + r_id, 'yf_' + r_id if y_r in solver.neg_vars and y_f in solver.pos_vars: solver.add_constraint('lb_' + r_id, {r_id: 1, y_f: -eps, y_r: bigM}, '>', 0, update_problem=False) solver.add_constraint('ub_' + r_id, {r_id: 1, y_f: -bigM, y_r: eps}, '<', 0, update_problem=False) solver.add_constraint('rev_' + r_id, {y_f: 1, y_r: 1}, '<', 1, update_problem=False) elif y_f in solver.pos_vars: solver.add_constraint('lb_' + r_id, {r_id: 1, y_f: -eps}, '>', 0, update_problem=False) solver.add_constraint('ub_' + r_id, {r_id: 1, y_f: -bigM}, '<', 0, update_problem=False) elif y_r in solver.neg_vars: solver.add_constraint('lb_' + r_id, {r_id: 1, y_r: bigM}, '>', 0, update_problem=False) solver.add_constraint('ub_' + r_id, {r_id: 1, y_r: eps}, '<', 0, update_problem=False) if uptake_score != 0: for r_id in model.reactions: if r_id.endswith('_E'): solver.add_constraint('lb_' + r_id, {r_id: 1, 'y_' + r_id: bigM}, '>', 0, update_problem=False) solver.update() for r_id in reactions: y_r, y_f = 'yr_' + r_id, 'yf_' + r_id if r_id in soft_constraints: sign = soft_constraints[r_id] if sign > 0: w_f, w_r = soft_score, 0 elif sign < 0: w_f, w_r = 0, soft_score else: w_f, w_r = -soft_score, -soft_score if y_f in solver.pos_vars: if r_id in soft_constraints: objective[y_f] = w_f elif ref_score != 0 and r_id in ref_reactions: objective[y_f] = 2 * scores[r_id] + ref_score else: objective[y_f] = scores[r_id] if y_r in solver.neg_vars: if r_id in soft_constraints: objective[y_r] = w_r elif ref_score != 0 and r_id in ref_reactions: objective[y_r] = 2 * scores[r_id] + ref_score else: objective[y_r] = scores[r_id] if uptake_score != 0: for r_id in model.reactions: if r_id.endswith('_E') and r_id not in soft_constraints: objective['y_' + r_id] = uptake_score if debug_output: solver.write_to_file(debug_output + "_milp_problem.lp") solver.set_parameter(Parameter.INT_FEASIBILITY_TOL, feast) solver.set_parameter(Parameter.OPTIMALITY_TOL, opti) solutions = solver.solve(linear=objective, minimize=False, get_values=True, pool_size=50, pool_gap=0) return solutions
def metabolite_uptake_score(community, environment=None, min_mass_weight=False, min_growth=0.1, max_uptake=10.0, abstol=1e-6, validate=False, n_solutions=100, pool_gap=0.5, verbose=True, exclude=None): #TODO: implement excluded """ Calculate frequency of metabolite requirement for species growth Zelezniak A. et al, Metabolic dependencies drive species co-occurrence in diverse microbial communities (PNAS 2015) Args: community (Community): microbial community environment (Environment): metabolic environment min_mass_weight (bool): Prefer smaller compounds (default: False) min_growth (float): minimum growth rate (default: 0.1) max_uptake (float): maximum uptake rate (default: 10) abstol (float): tolerance for detecting a non-zero exchange flux (default: 1e-6) validate (bool): validate solution using FBA (for debugging purposes, default: False) n_solutions (int): number of alternative solutions to calculate (default: 100) Returns: dict: Keys are organism names, values are dictionaries with metabolite frequencies dict: Extra information """ if environment: environment.apply(community.merged, inplace=True, warning=False) scores = {} solver = solver_instance(community.merged) for org_id, exchange_rxns in community.organisms_exchange_reactions.items( ): biomass_reaction = community.organisms_biomass_reactions[org_id] community.merged.biomass_reaction = biomass_reaction medium_list, sols = minimal_medium( community.merged, exchange_reactions=exchange_rxns.keys(), min_mass_weight=min_mass_weight, min_growth=min_growth, n_solutions=n_solutions, max_uptake=max_uptake, validate=validate, abstol=abstol, use_pool=True, pool_gap=pool_gap, solver=solver, warnings=verbose) if medium_list: counter = Counter(chain(*medium_list)) scores[org_id] = { cnm.original_metabolite: counter[ex] / float(len(medium_list)) for ex, cnm in exchange_rxns.items() } else: if verbose: warn('MUS: Failed to find a minimal growth medium for ' + org_id) scores[org_id] = None return scores
def minimal_medium(model, exchange_reactions=None, direction=-1, min_mass_weight=False, min_growth=1, max_uptake=100, max_compounds=None, n_solutions=1, validate=True, abstol=1e-6): """ Minimal medium calculator. Determines the minimum number of medium components for the organism to grow. Notes: There are two options provided: * simply minimize the total number of components * minimize nutrients by molecular weight (as implemented by Zarecki et al, 2014) Args: model (CBModel): model exchange_reactions: list of exchange reactions (if not provided all model exchange reactions are used) direction (int): direction of uptake reactions (negative or positive, default: -1) min_mass_weight (bool): minimize by molecular weight of nutrients (default: False) min_growth (float): minimum growth rate (default: 1) max_uptake (float): maximum uptake rate (default: 100) max_compounds (int): limit maximum number of compounds (optional) n_solutions (int): enumerate multiple solutions (default: 1) validate (bool): validate solution using FBA (for debugging purposes, default: False) abstol (float): tolerance for detecting a non-zero exchange flux (default: 1e-6) Returns: list: minimal set of exchange reactions Solution: solution from solver """ # TODO: 2_program_MMsolverClone.prof if exchange_reactions is None: exchange_reactions = list(model.get_exchange_reactions()) solver = solver_instance(model) # TODO: 2_program_MMsolver.prof #bck_bounds = {r_id: (model.reactions[r_id].lb, model.reactions[r_id].ub) for r_id in exchange_reactions} if direction < 0: solver.set_lower_bounds( {r_id: -max_uptake for r_id in exchange_reactions}) else: solver.set_upper_bounds( {r_id: max_uptake for r_id in exchange_reactions}) solver.set_lower_bounds({model.biomass_reaction: min_growth}) #bck_bounds[model.biomass_reaction] = (model.reactions[model.biomass_reaction].lb, model.reactions[model.biomass_reaction].ub) for r_id in exchange_reactions: solver.add_variable('y_' + r_id, 0, 1, vartype=VarType.BINARY, update_problem=False) solver.update() for r_id in exchange_reactions: if direction < 0: solver.add_constraint('c_' + r_id, { r_id: 1, 'y_' + r_id: max_uptake }, '>', 0, update_problem=False) else: solver.add_constraint('c_' + r_id, { r_id: 1, 'y_' + r_id: -max_uptake }, '<', 0, update_problem=False) if max_compounds: lhs = {'y_' + r_id: 1 for r_id in exchange_reactions} solver.add_constraint('max_cmpds', lhs, '<', max_compounds, update_problem=False) solver.update() if min_mass_weight: objective = {} for r_id in exchange_reactions: if direction < 0: compounds = model.reactions[r_id].get_substrates() else: compounds = model.reactions[r_id].get_products() if len(compounds) > 1: warn('Multiple compounds in exchange reaction (ignored)') continue if len(compounds) == 0: warn('No compounds in exchange reaction (ignored)') continue metabolite = model.metabolites[compounds[0]] if 'FORMULA' not in metabolite.metadata: warn('No formula for compound (ignored)') continue formulas = metabolite.metadata['FORMULA'].split(';') if len(formulas) > 1: warn('Multiple formulas for compound') weight = molecular_weight(formulas[0]) objective['y_' + r_id] = weight else: objective = {'y_' + r_id: 1 for r_id in exchange_reactions} solution = solver.solve(objective, minimize=True) if solution.status != Status.OPTIMAL: # warn('No solution found') return None, solution medium = set(r_id for r_id in exchange_reactions if (direction < 0 and solution.values[r_id] < -abstol or direction > 0 and solution.values[r_id] > abstol)) if validate: validate_solution(model, medium, exchange_reactions, direction, min_growth, max_uptake) if n_solutions == 1: return medium, solution else: medium_list = [medium] solutions = [solution] for i in range(1, n_solutions): constr_id = 'iteration_{}'.format(i) previous_sol = {'y_' + r_id: 1 for r_id in medium} solver.add_constraint(constr_id, previous_sol, '<', len(previous_sol) - 1) solution = solver.solve(objective, minimize=True) if solution.status != Status.OPTIMAL: break medium = set( r_id for r_id in exchange_reactions if (direction < 0 and solution.values[r_id] < -abstol or direction > 0 and solution.values[r_id] > abstol)) medium_list.append(medium) solutions.append(solution) return medium_list, solutions
def minimal_medium(model, exchange_reactions=None, direction=-1, min_mass_weight=False, min_growth=1, max_uptake=100, max_compounds=None, n_solutions=1, validate=True, abstol=1e-6, warnings=True, use_pool=False, pool_gap=None, solver=None, milp=True): """ Minimal medium calculator. Determines the minimum number of medium components for the organism to grow. Notes: There are two options provided: * simply minimize the total number of components * minimize nutrients by molecular weight (as implemented by Zarecki et al, 2014) Args: model (CBModel): model exchange_reactions: list of exchange reactions (if not provided all model exchange reactions are used) direction (int): direction of uptake reactions (negative or positive, default: -1) min_mass_weight (bool): minimize by molecular weight of nutrients (default: False) min_growth (float): minimum growth rate (default: 1) max_uptake (float): maximum uptake rate (default: 100) max_compounds (int): limit maximum number of compounds (optional) n_solutions (int): enumerate multiple solutions (default: 1) validate (bool): validate solution using FBA (for debugging purposes, default: False) abstol (float): tolerance for detecting a non-zero exchange flux (default: 1e-6) Returns: list: minimal set of exchange reactions Solution: solution from solver """ def warn_wrapper(message): if warnings: warn(message) if exchange_reactions is None: exchange_reactions = model.get_exchange_reactions() if not solver: solver = solver_instance(model) persistent = True else: persistent = False if not milp and max_compounds is not None: raise RuntimeError("max_compounds can only be used with MILP formulation") if not milp and n_solutions > 1: raise RuntimeError("n_solutions can only be used with MILP formulation") if milp: for r_id in exchange_reactions: solver.add_variable('y_' + r_id, 0, 1, vartype=VarType.BINARY, update_problem=False, persistent=persistent) else: for r_id in exchange_reactions: solver.add_variable('f_' + r_id, 0, max_uptake, update_problem=False, persistent=persistent) solver.update() if milp: for r_id in exchange_reactions: if direction < 0: solver.add_constraint('c_' + r_id, {r_id: 1, 'y_' + r_id: max_uptake}, '>', 0, update_problem=False, persistent=persistent) else: solver.add_constraint('c_' + r_id, {r_id: 1, 'y_' + r_id: -max_uptake}, '<', 0, update_problem=False, persistent=persistent) if max_compounds: lhs = {'y_' + r_id: 1 for r_id in exchange_reactions} solver.add_constraint('max_cmpds', lhs, '<', max_compounds, update_problem=False, persistent=persistent) else: for r_id in exchange_reactions: if direction < 0: solver.add_constraint('c_' + r_id, {r_id: 1, 'f_' + r_id: 1}, '>', 0, update_problem=False, persistent=persistent) else: solver.add_constraint('c_' + r_id, {r_id: 1, 'f_' + r_id: -1}, '<', 0, update_problem=False, persistent=persistent) solver.update() valid_reactions = [] if min_mass_weight: objective = {} multiple_compounds =[] no_compounds = [] no_formula = [] multiple_formulas = [] invalid_formulas = [] for r_id in exchange_reactions: if direction < 0: compounds = model.reactions[r_id].get_substrates() else: compounds = model.reactions[r_id].get_products() if len(compounds) > 1: multiple_compounds.append(r_id) continue #TODO should not allow reaction to be used if len(compounds) == 0: no_compounds.append(r_id) continue #TODO should not allow reaction to be used metabolite = model.metabolites[compounds[0]] if 'FORMULA' not in metabolite.metadata: no_formula.append(metabolite.id) continue #TODO should not allow reaction to be used formulas = metabolite.metadata['FORMULA'].split(';') if len(formulas) > 1: multiple_formulas.append(metabolite.id) weight = molecular_weight(formulas[0]) if weight is None: invalid_formulas.append(metabolite.id) continue if milp: objective['y_' + r_id] = weight else: objective['f_' + r_id] = weight valid_reactions.append(r_id) if multiple_compounds: warn_wrapper("Reactions ignored (multiple compounds): " + ','.join(multiple_compounds)) if no_compounds: warn_wrapper("Reactions ignored (no compounds): " + ','.join(no_compounds)) if multiple_compounds: warn_wrapper("Compounds ignored (no formula): " + ','.join(no_formula)) if multiple_formulas: warn_wrapper("Coupounds with multiple formulas (using first): " + ','.join(multiple_formulas)) if invalid_formulas: warn_wrapper("Coupounds ignored (invalid formula): " + ','.join(invalid_formulas)) else: if milp: objective = {'y_' + r_id: 1 for r_id in exchange_reactions} else: objective = {'f_' + r_id: 1 for r_id in exchange_reactions} valid_reactions = exchange_reactions result, ret_sols = None, None if direction < 0: constraints = {r_id: (-max_uptake if r_id in valid_reactions else 0, model.reactions[r_id].ub) for r_id in exchange_reactions} else: constraints = {r_id: (model.reactions[r_id].lb, max_uptake if r_id in valid_reactions else 0) for r_id in exchange_reactions} constraints[model.biomass_reaction] = (min_growth, None) if n_solutions == 1: solution = solver.solve(objective, minimize=True, constraints=constraints, get_values=exchange_reactions) if solution.status != Status.OPTIMAL: warn_wrapper('No solution found') result, ret_sols = None, solution else: medium = get_medium(solution, exchange_reactions, direction, abstol) if validate: validate_solution(model, medium, exchange_reactions, direction, min_growth, max_uptake) result, ret_sols = medium, solution elif use_pool: solutions = solver.solve(objective, minimize=True, constraints=constraints, get_values=exchange_reactions, pool_size=n_solutions, pool_gap=pool_gap) if solutions is None: result, ret_sols = [], [] else: media = [get_medium(solution, exchange_reactions, direction, abstol) for solution in solutions] result, ret_sols = media, solutions else: media = [] solutions = [] for i in range(0, n_solutions): if i > 0: constr_id = 'iteration_{}'.format(i) previous_sol = {'y_' + r_id: 1 for r_id in medium} solver.add_constraint(constr_id, previous_sol, '<', len(previous_sol) - 1) solution = solver.solve(objective, minimize=True, constraints=constraints, get_values=exchange_reactions) if solution.status != Status.OPTIMAL: break medium = get_medium(solution, exchange_reactions, direction, abstol) media.append(medium) solutions.append(solution) result, ret_sols = media, solutions if not persistent: solver.clean_up() return result, ret_sols