def do_many_prunes(network, export, reps, ins, outs): ''' Given a string_chem_net object and whether or not to allow export, prune the network many times and record the sizes of the pruned networks ''' # create the COBRApy model model = scn.make_cobra_model( network.met_list, network.rxn_list, allow_export = export ) # create a DataFrame to hold the reaction and metabolite counts of the # pruned networks output = pd.DataFrame(columns = ['rxn_count', 'met_count']) # prune this network reps times for rep in range(reps): if rep % 10 == 0: print(f'On prune {rep} of {reps}') # work with a copy of the model so it remains untouched for the next # iteration of the loop full_model = model.copy() # randomly choose the appropriate number of input and output mets bm_rxn = scn.choose_bm_mets(outs, full_model) scn.choose_inputs(ins, full_model, bm_rxn) full_model.objective = bm_rxn # see if there's a feasible solution on the full model solution = full_model.optimize() # can't just check solution.status because sometimes it's feasible but the # flux through the biomass reaction is vanishingly small bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id) while solution.status == 'infeasible' or bm_rxn_flux < 10e-10: # if the solution isn't feasible, pick a different environment in_rxns = [ # don't want to remove all boundary reactions because that would # also remove all of the export reactions rxn for rxn in full_model.boundary if rxn.id.startswith('->') ] full_model.remove_reactions(in_rxns) scn.choose_inputs(outs, full_model, bm_rxn) solution = full_model.optimize() bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id) # now that we know there's at least one environment that supports growth # with this biomass reaction, we can prune the universal network pruned_model = scn.min_flux_prune(full_model, bm_rxn) # count reactions and metabolites rxn_count = len(pruned_model.reactions) # metabolites aren't automatically removed when all of their reactions # are removed, so find out how many metabolites are left met_count = len([ m for m in pruned_model.metabolites if len(m.reactions) > 0 ]) # add to the output dataframe some_output = pd.DataFrame( [[rxn_count, met_count]], columns = ['rxn_count', 'met_count'] ) output = output.append(some_output, ignore_index = True) return(output)
def prune_once(universal_model, ins, outs, flux_bins, rep): ''' Given a universal string chemistry network, add a random biomass reaction and random input reactions, make sure that combination can produce biomass, prune the network, and return the degree and flux distributions of the pruned network ''' # work with a copy of the model so it remains untouched for the next # iteration of the loop full_model = universal_model.copy() # randomly choose the appropriate number of input and output mets bm_rxn = scn.choose_bm_mets(outs, full_model) scn.choose_inputs(ins, full_model, bm_rxn) full_model.objective = bm_rxn # see if there's a feasible solution on the full model solution = full_model.optimize() # can't just check solution.status because sometimes it's feasible but the # flux through the biomass reaction is vanishingly small bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) while solution.status == 'infeasible' or bm_rxn_flux < 10e-10: # if the solution isn't feasible, pick a different environment in_rxns = [ # don't want to remove all boundary reactions because that would # also remove all of the export reactions rxn for rxn in full_model.boundary if rxn.id.startswith('->') ] full_model.remove_reactions(in_rxns) scn.choose_inputs(ins, full_model, bm_rxn) solution = full_model.optimize() bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) # now that we know there's at least one environment that supports growth # with this biomass reaction, we can prune the universal network pruned_model = scn.min_flux_prune(full_model, bm_rxn) # get the degree and flux distributions from the pruned network deg_dist = make_deg_dist(pruned_model) fluxes = abs(pruned_model.optimize().fluxes) # exclude fluxes that are approximately zero flux_dist = pd.DataFrame(fluxes[fluxes > 10e-10]) flux_dist.columns = ['flux'] # add a column to the degree and flux distribution dataframes to indicate # which round of pruning this data came from deg_dist['trial'] = rep flux_dist['trial'] = rep return ((deg_dist, flux_dist))
def prune_model(universal_model, ins, outs): ''' Prune the given universal model with the specified number of input and output metabolites ''' # work with a copy of the model so it remains untouched for the next # iteration of the loop full_model = universal_model.copy() # randomly choose the appropriate number of input and output mets bm_rxn = scn.choose_bm_mets(outs, full_model) scn.choose_inputs(ins, full_model, bm_rxn) full_model.objective = bm_rxn # see if there's a feasible solution on the full model solution = full_model.optimize() # can't just check solution.status because sometimes it's feasible but the # flux through the biomass reaction is vanishingly small bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) while solution.status == 'infeasible' or bm_rxn_flux < 10e-10: # if the solution isn't feasible, pick a different environment in_rxns = [ # don't want to remove all boundary reactions because that would # also remove all of the export reactions rxn for rxn in full_model.boundary if rxn.id.startswith('->') ] full_model.remove_reactions(in_rxns) scn.choose_inputs(ins, full_model, bm_rxn) solution = full_model.optimize() bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) # now that we know there's at least one environment that supports growth # with this biomass reaction, we can prune the universal network pruned_model = scn.min_flux_prune(full_model, bm_rxn) # metabolites aren't automatically removed when all of their reactions # are removed, so find out how many metabolites are left met_count = len( [m for m in pruned_model.metabolites if len(m.reactions) > 0]) # compute the reaction-to-metabolite ratio and % of pruned reactions ratio = len(pruned_model.reactions) / met_count pruned_count = len(universal_model.reactions) - len(pruned_model.reactions) pruned_pct = pruned_count / len(universal_model.reactions) output = [ins, outs, ratio, pruned_pct] # make everything a string so we can join it later return ([str(x) for x in output])
# flux through the biomass reaction is vanishingly small bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) while solution.status == 'infeasible' or bm_rxn_flux < 10e-10: # if the solution isn't feasible, pick a different environment in_rxns = [ # don't want to remove all boundary reactions because that would # also remove all of the export reactions rxn for rxn in full_model.boundary if rxn.id.startswith('->') ] full_model.remove_reactions(in_rxns) scn.choose_inputs(condition[0], full_model, bm_rxn) solution = full_model.optimize() bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) # now that we know there's at least one environment that supports growth # with this biomass reaction, we can prune the universal network pruned_model = scn.min_flux_prune(full_model, bm_rxn) # metabolites aren't automatically removed when all of their reactions # are removed, so find out how many metabolites are left met_count = len( [m for m in pruned_model.metabolites if len(m.reactions) > 0]) # compute the reaction-to-metabolite ratio and % of pruned reactions ratio = len(pruned_model.reactions) / met_count pruned_count = len(universal_model.reactions) - len( pruned_model.reactions) pruned_pct = pruned_count / len(universal_model.reactions) output = [condition[0], condition[1], ratio, pruned_pct] # make everything a string so we can join it later output_data.append([str(x) for x in output]) with open( f'data/varied_{monos}_{max_pol}_{min_ins}to{max_ins}ins_' +
def prune_many_times(arglist): ''' Given: - COBRApy model representing a full/complete/un-pruned string chemistry - A number of nutrient sources - A number of biomass precursors - A number of different sets of nutrient sources - A number of times to change stoichiometric coefficients in the biomass reaction Do: - Create a biomass reaction with the designated number of reactants - Create the designated number of variants on that reaction with randomized stoichiometric coefficients (each reactant's coefficient is assigned to a random integer between 1 and 10) - Choose the designated number of sets of the designated number of nutrient sources - Prune the network once for each combination of biomass reaction and set of nutrients Return a Dataframe containing: - Binary vector indicating which reactions were kept in each pruned network - List of biomass precursors used - List of nutrient sources used - Maximum achievable flux through biomass reaction ''' # probably a more elegant way to do this but I'm currently new to mp.map() (full_model, ins, outs, envs, combos) = arglist # add a biomass reaction but remove it from the model immediately bm_rxn = scn.choose_bm_mets(outs, full_model) full_model.remove_reactions([bm_rxn]) # loop over vaariants of the biomass reaction with different coefficients # but identical reactants for combo in range(combos): if (combo + 1) % 10 == 0: print(f'On coefficient set {combo+1} of {combos}') # make a new biomass reaction new_bm = cobra.Reaction('varied_bm_rxn') new_bm.add_metabolites( {m: -random.randint(1, 10) for m in bm_rxn.metabolites}) # make a copy of the model before adding the new biomass reaction model = full_model.copy() model.add_reaction(new_bm) model.objective = new_bm # keep lists of the environments used, the reaction-inclusion vectors of # the pruned networks and the growth rates on the pruned networks food_mets = list() rxn_incl_vecs = list() pruned_growths = list() # use a while loop and not a for loop so we can go back on occasion i = 0 # counter for how many times it had to reselct the environment to get a # feasible solution with the full network j = 0 while i < envs: i += 1 # remove existing input reactions in_rxns = [ rxn for rxn in model.boundary if rxn.id.startswith('->') ] model.remove_reactions(in_rxns) # choose new input reactions scn.choose_inputs(ins, model, new_bm) in_rxns = [ rxn for rxn in model.boundary if rxn.id.startswith('->') ] foods_string = ' '.join([ # getting the metabolite IDs out of a reaction is annoying list(rxn.metabolites.keys())[0].id for rxn in in_rxns ]) # see if this choice of metabolites can produce the biomass on this network solution = model.optimize() bm_rxn_flux = solution.fluxes.get(key=new_bm.id) if solution.status == 'infeasible' or bm_rxn_flux < 1e-10: # redo this iteration of the loop i -= 1 # increment the counter of redos j += 1 continue # record the metabolites that worked and prune the network else: if i % 100 == 0: print(f'On environment {i}') # reset the reselection counter j = 0 # get the list of food source metabolites food_mets.append('-'.join( [met.id for rxn in in_rxns for met in rxn.metabolites])) # prune the network pruned_net = scn.min_flux_prune(model, new_bm) rxn_incl = scn.make_rxn_incl(model, pruned_net) rxn_incl_vecs.append(rxn_incl) # get the growth rate on the pruned network solution = pruned_net.optimize() pruned_growth = solution.fluxes.get(key=new_bm.id) pruned_growths.append(pruned_growth) # make a dataframe out of the lists and add it to the larger dataframe data = pd.DataFrame(list(zip(food_mets, rxn_incl_vecs, pruned_growths))) data.columns = ['env', 'rxn_incl', 'growth'] # add a column with the biomass components data['biomass'] = list( it.repeat('-'.join([met.id for met in bm_rxn.metabolites]), len(food_mets))) # reorder columns data = data[['biomass', 'env', 'rxn_incl', 'growth']] return (data)
# flux through the biomass reaction is vanishingly small bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id) while solution.status == 'infeasible' or bm_rxn_flux < 10e-10: # if the solution isn't feasible, pick a different environment in_rxns = [ # don't want to remove all boundary reactions because that would # also remove all of the export reactions rxn for rxn in universal_model.boundary if rxn.id.startswith('->') ] universal_model.remove_reactions(in_rxns) scn.choose_inputs(int(ins), universal_model, bm_rxn) solution = universal_model.optimize() bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id) # now that we know there's at least one environment that supports growth # with this biomass reaction, we can prune the universal network pruned_model = scn.min_flux_prune(universal_model, bm_rxn) # find growth in every environment for env in envs: # start by removing existing input reactions in_rxns = [ # don't want to remove all boundary reactions because that would # also remove all of the export reactions rxn for rxn in pruned_model.boundary if rxn.id.startswith('->') ] pruned_model.remove_reactions(in_rxns) # while we have the pruned network with no input reactions, make the # reaction-inclusion vector bitstring = scn.make_rxn_incl(universal_model, pruned_model) # create new input reactions for met in env: in_rxn = cobra.Reaction(
pruned_nets = dict() i = 0 while i < int(bms): # pick a new biomass reaction and set it as the objective bm_rxn = scn.choose_bm_mets(int(outs), cobra_model) cobra_model.objective = bm_rxn # see if these biomass precursors can be produced on this environment # before we bother pruning solution = cobra_model.optimize() bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) if solution.status != 'infeasible' and bm_rxn_flux > 10e-10: # only increment the counter if we've chosen a usable biomass reaction i += 1 print(f'On biomass reaction {i}') # run the minimum flux pruner pruned_net = scn.min_flux_prune(cobra_model, bm_rxn) # remove the biomass reaction before making the reaction bitstring # can't just remove the reaction because somehow in the pruning process # the biomass reaction became different in some subtle way pruned_bm_rxn = pruned_net.reactions.get_by_id(bm_rxn.id) pruned_net.remove_reactions([pruned_bm_rxn]) bitstring = scn.make_rxn_incl(cobra_model, pruned_net) pruned_nets[bitstring] = [met.id for met in bm_rxn.metabolites] # remove this biomass reaction from the full network regardless of whether # it worked or not cobra_model.remove_reactions([bm_rxn]) # print a bunch of info but also write it out to a tsv with open( f'data/multiple_bm_min_prune_{monos}_{max_pol}_{ins}ins_' + f'{outs}outs_{bms}_bms.tsv', 'w') as out:
# choose new input and biomass reactions exp_bm_rxn = scn.choose_bm_mets(int(outs), export_model) no_exp_bm_rxn = exp_bm_rxn.copy() no_export_model.add_reaction(no_exp_bm_rxn) export_model.objective = exp_bm_rxn no_export_model.objective = no_exp_bm_rxn scn.choose_inputs(int(ins), no_export_model, no_exp_bm_rxn) export_model.add_reactions( [rxn.copy() for rxn in no_export_model.boundary]) # see if this combination works for both networks export_solution = export_model.optimize() no_export_solution = no_export_model.optimize() # do min-flux pruning and get reaction-inclusion vectors for both networks print('Using minimum-flux pruner.') min_pruned_export = scn.min_flux_prune(export_model, exp_bm_rxn) min_pruned_no_export = scn.min_flux_prune(no_export_model, no_exp_bm_rxn) min_export_count = len(min_pruned_export.reactions) min_no_export_count = len(min_pruned_no_export.reactions) min_export_rxn_incl = scn.make_rxn_incl(export_model, min_pruned_export) min_no_export_rxn_incl = scn.make_rxn_incl(no_export_model, min_pruned_no_export) # randomly prune each network as many times as specified print('Randomly pruning with export reactions.') (rand_export_pruned_rxn_counts, rand_export_prune_counts, rand_export_pruned_nets) = do_many_rand_prunes(export_model, exp_bm_rxn, int(reps)) print('Randomly pruning without export reactions.') (rand_no_export_pruned_rxn_counts, rand_no_export_prune_counts,
def prune_many_times(arglist): # probably a more elegant way to do this but I'm currently new to mp.map() full_model, ins, outs, envs = arglist # start by making a copy of the original model so we don't have to remove # the biomass reaction each time model = full_model.copy() # add a biomass reaction and set it as the objective bm_rxn = scn.choose_bm_mets(outs, model) model.objective = bm_rxn # keep lists of the environments used, the reaction-inclusion vectors of # the pruned networks and the growth rates on the pruned networks food_mets = list() rxn_incl_vecs = list() pruned_growths = list() # use a while loop and not a for loop so we can go back on occasion i = 0 # counter for how many times it had to reselct the environment to get a # feasible solution with the full network j = 0 while i < envs: i += 1 # remove existing input reactions in_rxns = [rxn for rxn in model.boundary if rxn.id.startswith('->')] model.remove_reactions(in_rxns) # choose new input reactions scn.choose_inputs(ins, model, bm_rxn) in_rxns = [rxn for rxn in model.boundary if rxn.id.startswith('->')] foods_string = ' '.join([ # getting the metabolite IDs out of a reaction is annoying list(rxn.metabolites.keys())[0].id for rxn in in_rxns ]) # see if this choice of metabolites can produce the biomass on this network solution = model.optimize() bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) if solution.status == 'infeasible' or bm_rxn_flux < 1e-10: # redo this iteration of the loop i -= 1 # increment the counter of redos j += 1 continue # record the metabolites that worked and prune the network else: if i % 100 == 0: print(f'On environment {i}') # reset the reselection counter j = 0 # get the list of food source metabolites food_mets.append('-'.join( [met.id for rxn in in_rxns for met in rxn.metabolites])) # prune the network pruned_net = scn.min_flux_prune(model, bm_rxn) rxn_incl = scn.make_rxn_incl(model, pruned_net) rxn_incl_vecs.append(rxn_incl) # get the growth rate on the pruned network solution = pruned_net.optimize() pruned_growth = solution.fluxes.get(key=bm_rxn.id) pruned_growths.append(pruned_growth) # make a dataframe out of the lists and add it to the larger dataframe data = pd.DataFrame(list(zip(food_mets, rxn_incl_vecs, pruned_growths))) data.columns = ['env', 'rxn_incl', 'growth'] # add a column with the biomass components data['biomass'] = list( it.repeat('-'.join([met.id for met in bm_rxn.metabolites]), len(food_mets))) # reorder columns data = data[['biomass', 'env', 'rxn_incl', 'growth']] return (data)