def do_many_prunes(network, export, reps, ins, outs): ''' Given a string_chem_net object and whether or not to allow export, prune the network many times and record the sizes of the pruned networks ''' # create the COBRApy model model = scn.make_cobra_model( network.met_list, network.rxn_list, allow_export = export ) # create a DataFrame to hold the reaction and metabolite counts of the # pruned networks output = pd.DataFrame(columns = ['rxn_count', 'met_count']) # prune this network reps times for rep in range(reps): if rep % 10 == 0: print(f'On prune {rep} of {reps}') # work with a copy of the model so it remains untouched for the next # iteration of the loop full_model = model.copy() # randomly choose the appropriate number of input and output mets bm_rxn = scn.choose_bm_mets(outs, full_model) scn.choose_inputs(ins, full_model, bm_rxn) full_model.objective = bm_rxn # see if there's a feasible solution on the full model solution = full_model.optimize() # can't just check solution.status because sometimes it's feasible but the # flux through the biomass reaction is vanishingly small bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id) while solution.status == 'infeasible' or bm_rxn_flux < 10e-10: # if the solution isn't feasible, pick a different environment in_rxns = [ # don't want to remove all boundary reactions because that would # also remove all of the export reactions rxn for rxn in full_model.boundary if rxn.id.startswith('->') ] full_model.remove_reactions(in_rxns) scn.choose_inputs(outs, full_model, bm_rxn) solution = full_model.optimize() bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id) # now that we know there's at least one environment that supports growth # with this biomass reaction, we can prune the universal network pruned_model = scn.min_flux_prune(full_model, bm_rxn) # count reactions and metabolites rxn_count = len(pruned_model.reactions) # metabolites aren't automatically removed when all of their reactions # are removed, so find out how many metabolites are left met_count = len([ m for m in pruned_model.metabolites if len(m.reactions) > 0 ]) # add to the output dataframe some_output = pd.DataFrame( [[rxn_count, met_count]], columns = ['rxn_count', 'met_count'] ) output = output.append(some_output, ignore_index = True) return(output)
def prune_once(universal_model, ins, outs, flux_bins, rep): ''' Given a universal string chemistry network, add a random biomass reaction and random input reactions, make sure that combination can produce biomass, prune the network, and return the degree and flux distributions of the pruned network ''' # work with a copy of the model so it remains untouched for the next # iteration of the loop full_model = universal_model.copy() # randomly choose the appropriate number of input and output mets bm_rxn = scn.choose_bm_mets(outs, full_model) scn.choose_inputs(ins, full_model, bm_rxn) full_model.objective = bm_rxn # see if there's a feasible solution on the full model solution = full_model.optimize() # can't just check solution.status because sometimes it's feasible but the # flux through the biomass reaction is vanishingly small bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) while solution.status == 'infeasible' or bm_rxn_flux < 10e-10: # if the solution isn't feasible, pick a different environment in_rxns = [ # don't want to remove all boundary reactions because that would # also remove all of the export reactions rxn for rxn in full_model.boundary if rxn.id.startswith('->') ] full_model.remove_reactions(in_rxns) scn.choose_inputs(ins, full_model, bm_rxn) solution = full_model.optimize() bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) # now that we know there's at least one environment that supports growth # with this biomass reaction, we can prune the universal network pruned_model = scn.min_flux_prune(full_model, bm_rxn) # get the degree and flux distributions from the pruned network deg_dist = make_deg_dist(pruned_model) fluxes = abs(pruned_model.optimize().fluxes) # exclude fluxes that are approximately zero flux_dist = pd.DataFrame(fluxes[fluxes > 10e-10]) flux_dist.columns = ['flux'] # add a column to the degree and flux distribution dataframes to indicate # which round of pruning this data came from deg_dist['trial'] = rep flux_dist['trial'] = rep return ((deg_dist, flux_dist))
def prune_model(universal_model, ins, outs): ''' Prune the given universal model with the specified number of input and output metabolites ''' # work with a copy of the model so it remains untouched for the next # iteration of the loop full_model = universal_model.copy() # randomly choose the appropriate number of input and output mets bm_rxn = scn.choose_bm_mets(outs, full_model) scn.choose_inputs(ins, full_model, bm_rxn) full_model.objective = bm_rxn # see if there's a feasible solution on the full model solution = full_model.optimize() # can't just check solution.status because sometimes it's feasible but the # flux through the biomass reaction is vanishingly small bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) while solution.status == 'infeasible' or bm_rxn_flux < 10e-10: # if the solution isn't feasible, pick a different environment in_rxns = [ # don't want to remove all boundary reactions because that would # also remove all of the export reactions rxn for rxn in full_model.boundary if rxn.id.startswith('->') ] full_model.remove_reactions(in_rxns) scn.choose_inputs(ins, full_model, bm_rxn) solution = full_model.optimize() bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) # now that we know there's at least one environment that supports growth # with this biomass reaction, we can prune the universal network pruned_model = scn.min_flux_prune(full_model, bm_rxn) # metabolites aren't automatically removed when all of their reactions # are removed, so find out how many metabolites are left met_count = len( [m for m in pruned_model.metabolites if len(m.reactions) > 0]) # compute the reaction-to-metabolite ratio and % of pruned reactions ratio = len(pruned_model.reactions) / met_count pruned_count = len(universal_model.reactions) - len(pruned_model.reactions) pruned_pct = pruned_count / len(universal_model.reactions) output = [ins, outs, ratio, pruned_pct] # make everything a string so we can join it later return ([str(x) for x in output])
# outermost loop over those conditions for condition in conditions: print( f'On condition {conditions.index(condition)+1} of {len(conditions)}: ' + f'{condition[0]} inputs and {condition[1]} outputs') # next, loop over number of times to prune with each condition for rep in range(int(reps)): if rep % 10 == 0: print(f'On prune {rep} of {reps}') # work with a copy of the model so it remains untouched for the next # iteration of the loop full_model = universal_model.copy() # randomly choose the appropriate number of input and output mets bm_rxn = scn.choose_bm_mets(condition[1], full_model) scn.choose_inputs(condition[0], full_model, bm_rxn) full_model.objective = bm_rxn # see if there's a feasible solution on the full model solution = full_model.optimize() # can't just check solution.status because sometimes it's feasible but the # flux through the biomass reaction is vanishingly small bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) while solution.status == 'infeasible' or bm_rxn_flux < 10e-10: # if the solution isn't feasible, pick a different environment in_rxns = [ # don't want to remove all boundary reactions because that would # also remove all of the export reactions rxn for rxn in full_model.boundary if rxn.id.startswith('->') ] full_model.remove_reactions(in_rxns) scn.choose_inputs(condition[0], full_model, bm_rxn)
def prune_many_times(arglist): ''' Given: - COBRApy model representing a full/complete/un-pruned string chemistry - A number of nutrient sources - A number of biomass precursors - A number of different sets of nutrient sources - A number of times to change stoichiometric coefficients in the biomass reaction Do: - Create a biomass reaction with the designated number of reactants - Create the designated number of variants on that reaction with randomized stoichiometric coefficients (each reactant's coefficient is assigned to a random integer between 1 and 10) - Choose the designated number of sets of the designated number of nutrient sources - Prune the network once for each combination of biomass reaction and set of nutrients Return a Dataframe containing: - Binary vector indicating which reactions were kept in each pruned network - List of biomass precursors used - List of nutrient sources used - Maximum achievable flux through biomass reaction ''' # probably a more elegant way to do this but I'm currently new to mp.map() (full_model, ins, outs, envs, combos) = arglist # add a biomass reaction but remove it from the model immediately bm_rxn = scn.choose_bm_mets(outs, full_model) full_model.remove_reactions([bm_rxn]) # loop over vaariants of the biomass reaction with different coefficients # but identical reactants for combo in range(combos): if (combo + 1) % 10 == 0: print(f'On coefficient set {combo+1} of {combos}') # make a new biomass reaction new_bm = cobra.Reaction('varied_bm_rxn') new_bm.add_metabolites( {m: -random.randint(1, 10) for m in bm_rxn.metabolites}) # make a copy of the model before adding the new biomass reaction model = full_model.copy() model.add_reaction(new_bm) model.objective = new_bm # keep lists of the environments used, the reaction-inclusion vectors of # the pruned networks and the growth rates on the pruned networks food_mets = list() rxn_incl_vecs = list() pruned_growths = list() # use a while loop and not a for loop so we can go back on occasion i = 0 # counter for how many times it had to reselct the environment to get a # feasible solution with the full network j = 0 while i < envs: i += 1 # remove existing input reactions in_rxns = [ rxn for rxn in model.boundary if rxn.id.startswith('->') ] model.remove_reactions(in_rxns) # choose new input reactions scn.choose_inputs(ins, model, new_bm) in_rxns = [ rxn for rxn in model.boundary if rxn.id.startswith('->') ] foods_string = ' '.join([ # getting the metabolite IDs out of a reaction is annoying list(rxn.metabolites.keys())[0].id for rxn in in_rxns ]) # see if this choice of metabolites can produce the biomass on this network solution = model.optimize() bm_rxn_flux = solution.fluxes.get(key=new_bm.id) if solution.status == 'infeasible' or bm_rxn_flux < 1e-10: # redo this iteration of the loop i -= 1 # increment the counter of redos j += 1 continue # record the metabolites that worked and prune the network else: if i % 100 == 0: print(f'On environment {i}') # reset the reselection counter j = 0 # get the list of food source metabolites food_mets.append('-'.join( [met.id for rxn in in_rxns for met in rxn.metabolites])) # prune the network pruned_net = scn.min_flux_prune(model, new_bm) rxn_incl = scn.make_rxn_incl(model, pruned_net) rxn_incl_vecs.append(rxn_incl) # get the growth rate on the pruned network solution = pruned_net.optimize() pruned_growth = solution.fluxes.get(key=new_bm.id) pruned_growths.append(pruned_growth) # make a dataframe out of the lists and add it to the larger dataframe data = pd.DataFrame(list(zip(food_mets, rxn_incl_vecs, pruned_growths))) data.columns = ['env', 'rxn_incl', 'growth'] # add a column with the biomass components data['biomass'] = list( it.repeat('-'.join([met.id for met in bm_rxn.metabolites]), len(food_mets))) # reorder columns data = data[['biomass', 'env', 'rxn_incl', 'growth']] return (data)
break # loop over different biomass reactions # list of lists to store output growth_lists = list() i = 0 while i < int(bm_count): i += 1 print(f'On biomass reaction {i}') # work with a copy of the model so it remains untouched for the next # iteration of the loop universal_model = cobra_model.copy() # find an environment that supports growth with this environment so we can # prune bm_rxn = scn.choose_bm_mets(int(outs), universal_model) scn.choose_inputs(int(ins), universal_model, bm_rxn) universal_model.objective = bm_rxn solution = universal_model.optimize() # can't just check solution.status because sometimes it's feasible but the # flux through the biomass reaction is vanishingly small bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id) while solution.status == 'infeasible' or bm_rxn_flux < 10e-10: # if the solution isn't feasible, pick a different environment in_rxns = [ # don't want to remove all boundary reactions because that would # also remove all of the export reactions rxn for rxn in universal_model.boundary if rxn.id.startswith('->') ] universal_model.remove_reactions(in_rxns) scn.choose_inputs(int(ins), universal_model, bm_rxn) solution = universal_model.optimize()
# make a string chemistry network of this size SCN = scn.CreateNetwork(monos, max_len) cobra_model = scn.make_cobra_model(SCN.met_list, SCN.rxn_list, allow_export=False) # now do FBA reps times, with a different set of input and output # metabolites each time rep = 0 while (rep < reps): rep += 1 if rep % 10 == 0: print(f'On rep {rep} of {reps}') # make a copy to add the new reactions to new_model = cobra_model.copy() bm_rxn = scn.choose_bm_mets(outs, new_model) scn.choose_inputs(ins, new_model, bm_rxn) new_model.objective = bm_rxn # optimize and count number of reactions with flux solution = new_model.optimize() # make sure there's flux through the objective before saving info if solution.fluxes.get(key=bm_rxn.id) < 10e-10: rep -= 1 else: nonzero_fluxes = len(solution.fluxes[solution.fluxes != 0]) # divide by number of reactions in full network nonzero_ratio = nonzero_fluxes / len(cobra_model.reactions) # add this number to the dataframe new_row = { 'monos': monos_count, 'max_len': max_len, 'nonzero_ratio': nonzero_ratio
def prune_many_times(arglist): # probably a more elegant way to do this but I'm currently new to mp.map() full_model, ins, outs, envs = arglist # start by making a copy of the original model so we don't have to remove # the biomass reaction each time model = full_model.copy() # add a biomass reaction and set it as the objective bm_rxn = scn.choose_bm_mets(outs, model) model.objective = bm_rxn # keep lists of the environments used, the reaction-inclusion vectors of # the pruned networks and the growth rates on the pruned networks food_mets = list() rxn_incl_vecs = list() pruned_growths = list() # use a while loop and not a for loop so we can go back on occasion i = 0 # counter for how many times it had to reselct the environment to get a # feasible solution with the full network j = 0 while i < envs: i += 1 # remove existing input reactions in_rxns = [rxn for rxn in model.boundary if rxn.id.startswith('->')] model.remove_reactions(in_rxns) # choose new input reactions scn.choose_inputs(ins, model, bm_rxn) in_rxns = [rxn for rxn in model.boundary if rxn.id.startswith('->')] foods_string = ' '.join([ # getting the metabolite IDs out of a reaction is annoying list(rxn.metabolites.keys())[0].id for rxn in in_rxns ]) # see if this choice of metabolites can produce the biomass on this network solution = model.optimize() bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) if solution.status == 'infeasible' or bm_rxn_flux < 1e-10: # redo this iteration of the loop i -= 1 # increment the counter of redos j += 1 continue # record the metabolites that worked and prune the network else: if i % 100 == 0: print(f'On environment {i}') # reset the reselection counter j = 0 # get the list of food source metabolites food_mets.append('-'.join( [met.id for rxn in in_rxns for met in rxn.metabolites])) # prune the network pruned_net = scn.bm_impact_prune(model, bm_rxn) rxn_incl = scn.make_rxn_incl(model, pruned_net) rxn_incl_vecs.append(rxn_incl) # get the growth rate on the pruned network solution = pruned_net.optimize() pruned_growth = solution.fluxes.get(key=bm_rxn.id) pruned_growths.append(pruned_growth) # make a dataframe out of the lists and add it to the larger dataframe data = pd.DataFrame(list(zip(food_mets, rxn_incl_vecs, pruned_growths))) data.columns = ['env', 'rxn_incl', 'growth'] # add a column with the biomass components data['biomass'] = list( it.repeat('-'.join([met.id for met in bm_rxn.metabolites]), len(food_mets))) # reorder columns data = data[['biomass', 'env', 'rxn_incl', 'growth']] return (data)
# keep lists of the environments used and the reaction-inclusion vectors of # the pruned networks food_mets = list() rxn_incl_vecs = list() # use a while loop and not a for loop so we can go back on occasion i = 0 # counter for how many times it had to reselct the environment to get a # feasible solution with the full network j = 0 while i < int(envs): i += 1 # remove existing input reactions in_rxns = [rxn for rxn in model.boundary if rxn.id.startswith('->')] model.remove_reactions(in_rxns) # choose new input reactions scn.choose_inputs(int(ins), model, bm_rxn) in_rxns = [rxn for rxn in model.boundary if rxn.id.startswith('->')] foods_string = ' '.join([ # getting the metabolite IDs out of a reaction is annoying list(rxn.metabolites.keys())[0].id for rxn in in_rxns ]) # see if this choice of metabolites can produce the biomass on this network solution = model.optimize() bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) if solution.status == 'infeasible' or bm_rxn_flux < 1e-10: # redo this iteration of the loop i -= 1 # increment the counter of redos j += 1 continue # record these metabolites and the reactions that had flux
def compare_nets(universal_network, ins, outs): ''' Given a network without any input reactions or a biomass reaction, add those and prune it using both the minimum flux pruner and the biomass sensitive pruner and record information about how the two pruning processes occurred ''' # copy the universal network so we have an unmodified version around net = universal_network.copy() # choose new input and biomass metabolites for the universal network bm_rxn = scn.choose_bm_mets(outs, net) net.objective = bm_rxn scn.choose_inputs(ins, net, bm_rxn) # prune using both approaches bm_pruned_rxns = bm_impact_prune(net, bm_rxn) min_pruned_rxns = min_flux_prune(net, bm_rxn) # make a dictionary to store information about how this network is pruned # by the two algorithms info_dict = { 'step': list(), 'type': list(), 'rxn_count': list(), 'jaccard': list() } # counter for pruning step i = 0 # zip the two lists of lists together so we can compare the lists of reactions for (min_step, bm_step) in zip(min_pruned_rxns, bm_pruned_rxns): # record how many reactions are in both networks and find the Jaccard index # for this pair min_count = len(min_step) bm_count = len(bm_step) overlap = sum([1 for rxn in min_step if rxn in bm_step]) jaccard = overlap / (min_count + bm_count - overlap) # add two "rows" to info_dict- one for each model. Will make plotting this # info easier later info_dict['step'].append(i) info_dict['type'].append('min') info_dict['rxn_count'].append(min_count) info_dict['jaccard'].append(jaccard) # both step and jaccard will be the same value info_dict['step'].append(i) info_dict['type'].append('bm') info_dict['rxn_count'].append(bm_count) info_dict['jaccard'].append(jaccard) # now we can increment the step counter i += 1 # there's no guarantee that both pruning algorithms took the same number of # steps to finish, so if that's the case the zip() above will have ignored the # extra lists from the algorithm that took longer, so we need to look at those # reaction lists if len(bm_pruned_rxns) != len(min_pruned_rxns): # figure out which list was the longer one longer_list = list() longer_type = '' shorter_list = list() shorter_type = '' if len(bm_pruned_rxns) > len(min_pruned_rxns): longer_list = bm_pruned_rxns longer_type = 'bm' shorter_list = min_pruned_rxns else: longer_list = min_pruned_rxns longer_type = 'min' shorter_list = bm_pruned_rxns # we know that i is the index we left off at, so continue looping through # the longer list at i+1 and make a new counter to keep track of the number # of extra steps in longer_list j = 0 j += i # do it this way so that i and j are actually independent variables for rxns in longer_list[i + 1:]: longer_count = len(rxns) # get number of reactions in last list in shorter_list shorter_count = len(shorter_list[-1]) overlap = sum([1 for rxn in rxns if rxn in shorter_list[-1]]) jaccard = overlap / (longer_count + shorter_count - overlap) # only need to add info for longer_list info_dict['step'].append(j) info_dict['type'].append(longer_type) info_dict['rxn_count'].append(longer_count) info_dict['jaccard'].append(jaccard) j += 1 # now that that's taken care of, turn this dict into a pandas dataframe and # return it info_df = pd.DataFrame(info_dict) return (info_df)
return (count) # get command-line arguments try: (monos, max_pol, ins, outs, bms) = sys.argv[1:] except ValueError: sys.exit( 'Arguments:\nmonomers\nmax polymer length\nnumber of food ' + 'sources in environment\nnumber of biomass precursors\nnumber of ' + 'different biomass reactions to use') # create the reference network and pick an environment SCN = scn.CreateNetwork(monos, int(max_pol)) cobra_model = scn.make_cobra_model(SCN.met_list, SCN.rxn_list) scn.choose_inputs(int(ins), cobra_model) # reaction bitstrings as keys, biomass components as values pruned_nets = dict() i = 0 while i < int(bms): # pick a new biomass reaction and set it as the objective bm_rxn = scn.choose_bm_mets(int(outs), cobra_model) cobra_model.objective = bm_rxn # see if these biomass precursors can be produced on this environment # before we bother pruning solution = cobra_model.optimize() bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id) if solution.status != 'infeasible' and bm_rxn_flux > 10e-10: # only increment the counter if we've chosen a usable biomass reaction i += 1
export_model = scn.make_cobra_model(SCN.met_list, SCN.rxn_list, allow_export=True) no_export_model = scn.make_cobra_model(SCN.met_list, SCN.rxn_list, allow_export=False) # give both networks the same biomass reaction as the objective exp_bm_rxn = scn.choose_bm_mets(int(outs), export_model) # have to make a copy of the reaction object or shit gets weird no_exp_bm_rxn = exp_bm_rxn.copy() no_export_model.add_reaction(no_exp_bm_rxn) export_model.objective = exp_bm_rxn no_export_model.objective = no_exp_bm_rxn # make import reactions on no_export model first so we can just get them from # the model's boundary for the export model scn.choose_inputs(int(ins), no_export_model, no_exp_bm_rxn) export_model.add_reactions([rxn.copy() for rxn in no_export_model.boundary]) # make sure that there's at least one feasible solution for both networks # before trying to prune either export_solution = export_model.optimize() no_export_solution = no_export_model.optimize() while export_solution.status == 'infeasible' or \ (export_solution.fluxes == 0).all() or \ no_export_solution.status == 'infeasible' or \ (no_export_solution.fluxes == 0).all(): # remove biomass reaction from both networks export_model.remove_reactions([exp_bm_rxn]) no_export_model.remove_reactions([no_exp_bm_rxn]) # remove input reactions from both networks no_export_model.remove_reactions([rxn for rxn in no_export_model.boundary])
import sys import string_chem_net as scn # get specs governing network size from command-line arguments try: (monos, max_pol, ins, outs) = sys.argv[1:] except ValueError: sys.exit('Arguments:\nmonomers\nmax polymer length\nnumber of food ' + 'sources\nnumber of biomass precursors') SCN = scn.CreateNetwork(monos, int(max_pol)) cobra_model = scn.make_cobra_model(SCN.met_list, SCN.rxn_list, allow_export=True) bm_rxn = scn.choose_bm_mets(int(outs), cobra_model) scn.choose_inputs(int(ins), cobra_model, bm_rxn) cobra_model.objective = bm_rxn # see if this environment supports growth with this biomass reaction solution = cobra_model.optimize() while solution.status == 'infeasible' or (solution.fluxes == 0).all(): # if not, remove existing biomass and input reactions cobra_model.remove_reactions([bm_rxn]) in_rxns = [rxn for rxn in cobra_model.boundary if rxn.id.startswith('->')] cobra_model.remove_reactions(in_rxns) # choose new ones and see if those yield a solvable network bm_rxn = scn.choose_bm_mets(int(outs), cobra_model) scn.choose_inputs(int(ins), cobra_model, bm_rxn) cobra_model.objective = bm_rxn solution = cobra_model.optimize()