Пример #1
0
def do_many_rand_prunes(full_model, bm_rxn, reps):
    # will hold number of reactions in each network
    pruned_rxn_counts = list()
    # will hold reaction inclusion vectors of all unique networks and the
    # number of times each one came up
    pruned_counts_dict = dict()
    # will hold all the unique networks found by random_prune after reps runs
    pruned_nets = list()
    for i in range(1, reps + 1):
        if i % 100 == 0:
            print(f'On random prune {i}.')
        pruned_net = scn.random_prune(full_model, bm_rxn)
        # in order to know whether we've seen this model before, we can't just
        # compare models, since no two models are ever 'equal', so we'll compare
        # reaction presence bitstrings.
        # We also want to keep track of how many times we see each model, so we
        # will make a dict with the bitstrings as keys
        # sort is in-place
        rxn_incl = scn.make_rxn_incl(full_model, pruned_net)
        if rxn_incl not in pruned_counts_dict.keys():
            # make sure all reaction lists are sorted so that all isomorphic
            # networks have the same reaction list
            pruned_counts_dict[rxn_incl] = 1
            pruned_nets.append(pruned_net)
        else:
            # if we already found this network once, then increment the counter
            # in random_pruned_nets by 1
            pruned_counts_dict[rxn_incl] += 1
        # so that we can see the distribution of network sizes, record the length
        # of the reaction list each time, regardless of whether or not we've seen
        # this network before
        pruned_rxn_counts.append(len(pruned_net.reactions))
    return (pruned_rxn_counts, pruned_counts_dict, pruned_nets)
Пример #2
0
def prune_many_times(arglist):
    '''
    Given:
    - COBRApy model representing a full/complete/un-pruned string chemistry
    - A number of nutrient sources
    - A number of biomass precursors
    - A number of different sets of nutrient sources
    - A number of times to change stoichiometric coefficients in the biomass
      reaction
    Do:
    - Create a biomass reaction with the designated number of reactants
    - Create the designated number of variants on that reaction with randomized 
      stoichiometric coefficients (each reactant's coefficient is assigned to
      a random integer between 1 and 10)
    - Choose the designated number of sets of the designated number of nutrient
      sources
    - Prune the network once for each combination of biomass reaction and
      set of nutrients
    Return a Dataframe containing:
    - Binary vector indicating which reactions were kept in each pruned network
    - List of biomass precursors used
    - List of nutrient sources used
    - Maximum achievable flux through biomass reaction
    '''
    # probably a more elegant way to do this but I'm currently new to mp.map()
    (full_model, ins, outs, envs, combos) = arglist
    # add a biomass reaction but remove it from the model immediately
    bm_rxn = scn.choose_bm_mets(outs, full_model)
    full_model.remove_reactions([bm_rxn])
    # loop over vaariants of the biomass reaction with different coefficients
    # but identical reactants
    for combo in range(combos):
        if (combo + 1) % 10 == 0:
            print(f'On coefficient set {combo+1} of {combos}')
        # make a new biomass reaction
        new_bm = cobra.Reaction('varied_bm_rxn')
        new_bm.add_metabolites(
            {m: -random.randint(1, 10)
             for m in bm_rxn.metabolites})
        # make a copy of the model before adding the new biomass reaction
        model = full_model.copy()
        model.add_reaction(new_bm)
        model.objective = new_bm
        # keep lists of the environments used, the reaction-inclusion vectors of
        # the pruned networks and the growth rates on the pruned networks
        food_mets = list()
        rxn_incl_vecs = list()
        pruned_growths = list()
        # use a while loop and not a for loop so we can go back on occasion
        i = 0
        # counter for how many times it had to reselct the environment to get a
        # feasible solution with the full network
        j = 0
        while i < envs:
            i += 1
            # remove existing input reactions
            in_rxns = [
                rxn for rxn in model.boundary if rxn.id.startswith('->')
            ]
            model.remove_reactions(in_rxns)
            # choose new input reactions
            scn.choose_inputs(ins, model, new_bm)
            in_rxns = [
                rxn for rxn in model.boundary if rxn.id.startswith('->')
            ]
            foods_string = ' '.join([
                # getting the metabolite IDs out of a reaction is annoying
                list(rxn.metabolites.keys())[0].id for rxn in in_rxns
            ])
            # see if this choice of metabolites can produce the biomass on this network
            solution = model.optimize()
            bm_rxn_flux = solution.fluxes.get(key=new_bm.id)
            if solution.status == 'infeasible' or bm_rxn_flux < 1e-10:
                # redo this iteration of the loop
                i -= 1
                # increment the counter of redos
                j += 1
                continue
            # record the metabolites that worked and prune the network
            else:
                if i % 100 == 0:
                    print(f'On environment {i}')
                # reset the reselection counter
                j = 0
                # get the list of food source metabolites
                food_mets.append('-'.join(
                    [met.id for rxn in in_rxns for met in rxn.metabolites]))
                # prune the network
                pruned_net = scn.min_flux_prune(model, new_bm)
                rxn_incl = scn.make_rxn_incl(model, pruned_net)
                rxn_incl_vecs.append(rxn_incl)
                # get the growth rate on the pruned network
                solution = pruned_net.optimize()
                pruned_growth = solution.fluxes.get(key=new_bm.id)
                pruned_growths.append(pruned_growth)

    # make a dataframe out of the lists and add it to the larger dataframe
    data = pd.DataFrame(list(zip(food_mets, rxn_incl_vecs, pruned_growths)))
    data.columns = ['env', 'rxn_incl', 'growth']
    # add a column with the biomass components
    data['biomass'] = list(
        it.repeat('-'.join([met.id for met in bm_rxn.metabolites]),
                  len(food_mets)))
    # reorder columns
    data = data[['biomass', 'env', 'rxn_incl', 'growth']]
    return (data)
Пример #3
0
     bm_rxn_flux = solution.fluxes.get(key = bm_rxn.id)
 # now that we know there's at least one environment that supports growth
 # with this biomass reaction, we can prune the universal network
 pruned_model = scn.min_flux_prune(universal_model, bm_rxn)
 # find growth in every environment
 for env in envs:
     # start by removing existing input reactions
     in_rxns = [
         # don't want to remove all boundary reactions because that would
         # also remove all of the export reactions
         rxn for rxn in pruned_model.boundary if rxn.id.startswith('->')
     ]
     pruned_model.remove_reactions(in_rxns)
     # while we have the pruned network with no input reactions, make the
     # reaction-inclusion vector
     bitstring = scn.make_rxn_incl(universal_model, pruned_model)
     # create new input reactions
     for met in env:
         in_rxn = cobra.Reaction(
             '->' + met.id,
             upper_bound = 1.0, # only allow importing of this metabolite
             lower_bound = 0.0
         )
         in_rxn.add_metabolites({met: 1.0})
         pruned_model.add_reaction(in_rxn)
     # do FBA to find growth in this environment
     solution = pruned_model.optimize()
     # prepare output
     growth = str(solution.fluxes.get(key = bm_rxn.id))
     env_string = ','.join([met.id for met in env])
     growth_lists.append([bm_rxn.id, env_string, growth, bitstring])
Пример #4
0
def prune_many_times(arglist):
    # probably a more elegant way to do this but I'm currently new to mp.map()
    full_model, ins, outs, envs = arglist
    # start by making a copy of the original model so we don't have to remove
    # the biomass reaction each time
    model = full_model.copy()
    # add a biomass reaction and set it as the objective
    bm_rxn = scn.choose_bm_mets(outs, model)
    model.objective = bm_rxn
    # keep lists of the environments used, the reaction-inclusion vectors of
    # the pruned networks and the growth rates on the pruned networks
    food_mets = list()
    rxn_incl_vecs = list()
    pruned_growths = list()
    # use a while loop and not a for loop so we can go back on occasion
    i = 0
    # counter for how many times it had to reselct the environment to get a
    # feasible solution with the full network
    j = 0
    while i < envs:
        i += 1
        # remove existing input reactions
        in_rxns = [rxn for rxn in model.boundary if rxn.id.startswith('->')]
        model.remove_reactions(in_rxns)
        # choose new input reactions
        scn.choose_inputs(ins, model, bm_rxn)
        in_rxns = [rxn for rxn in model.boundary if rxn.id.startswith('->')]
        foods_string = ' '.join([
            # getting the metabolite IDs out of a reaction is annoying
            list(rxn.metabolites.keys())[0].id for rxn in in_rxns
        ])
        # see if this choice of metabolites can produce the biomass on this network
        solution = model.optimize()
        bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id)
        if solution.status == 'infeasible' or bm_rxn_flux < 1e-10:
            # redo this iteration of the loop
            i -= 1
            # increment the counter of redos
            j += 1
            continue
        # record the metabolites that worked and prune the network
        else:
            if i % 100 == 0:
                print(f'On environment {i}')
            # reset the reselection counter
            j = 0
            # get the list of food source metabolites
            food_mets.append('-'.join(
                [met.id for rxn in in_rxns for met in rxn.metabolites]))
            # prune the network
            pruned_net = scn.bm_impact_prune(model, bm_rxn)
            rxn_incl = scn.make_rxn_incl(model, pruned_net)
            rxn_incl_vecs.append(rxn_incl)
            # get the growth rate on the pruned network
            solution = pruned_net.optimize()
            pruned_growth = solution.fluxes.get(key=bm_rxn.id)
            pruned_growths.append(pruned_growth)

    # make a dataframe out of the lists and add it to the larger dataframe
    data = pd.DataFrame(list(zip(food_mets, rxn_incl_vecs, pruned_growths)))
    data.columns = ['env', 'rxn_incl', 'growth']
    # add a column with the biomass components
    data['biomass'] = list(
        it.repeat('-'.join([met.id for met in bm_rxn.metabolites]),
                  len(food_mets)))
    # reorder columns
    data = data[['biomass', 'env', 'rxn_incl', 'growth']]
    return (data)
Пример #5
0
            j += 1
            continue
        # record these metabolites and the reactions that had flux
        else:
            if i % 100 == 0:
                print(f'On environment {i}')
            # reset the reselection counter
            j = 0
            # get the list of food source metabolites
            food_mets.append('-'.join(
                [met.id for rxn in in_rxns for met in rxn.metabolites]))
            # remove all reactions without flux
            no_flux_rxns = solution.fluxes[solution.fluxes == 0].index
            flux_only = model.copy()
            flux_only.remove_reactions(no_flux_rxns)
            rxn_incl = scn.make_rxn_incl(model, flux_only)
            rxn_incl_vecs.append(rxn_incl)

    # make a dataframe out of the two lists and add it to the larger dataframe
    more_data = pd.DataFrame(list(zip(food_mets, rxn_incl_vecs)))
    more_data.columns = ['env', 'rxn_incl']
    # add a column with the biomass components
    more_data['biomass'] = list(
        it.repeat('-'.join([met.id for met in bm_rxn.metabolites]),
                  len(food_mets)))
    all_data = all_data.append(more_data)

all_data.to_csv(
    f'data/multiple_env_fba_{monos}_{max_pol}_{ins}ins_{envs}envs_' +
    f'{outs}outs_{orgs}orgs_{export}exp.csv')
    # see if these biomass precursors can be produced on this environment
    # before we bother pruning
    solution = cobra_model.optimize()
    bm_rxn_flux = solution.fluxes.get(key=bm_rxn.id)
    if solution.status != 'infeasible' and bm_rxn_flux > 10e-10:
        # only increment the counter if we've chosen a usable biomass reaction
        i += 1
        print(f'On biomass reaction {i}')
        # run the minimum flux pruner
        pruned_net = scn.min_flux_prune(cobra_model, bm_rxn)
        # remove the biomass reaction before making the reaction bitstring
        # can't just remove the reaction because somehow in the pruning process
        # the biomass reaction became different in some subtle way
        pruned_bm_rxn = pruned_net.reactions.get_by_id(bm_rxn.id)
        pruned_net.remove_reactions([pruned_bm_rxn])
        bitstring = scn.make_rxn_incl(cobra_model, pruned_net)
        pruned_nets[bitstring] = [met.id for met in bm_rxn.metabolites]
    # remove this biomass reaction from the full network regardless of whether
    # it worked or not
    cobra_model.remove_reactions([bm_rxn])

# print a bunch of info but also write it out to a tsv
with open(
        f'data/multiple_bm_min_prune_{monos}_{max_pol}_{ins}ins_' +
        f'{outs}outs_{bms}_bms.tsv', 'w') as out:
    out.write('bitstring\trxn_count\tbiomass\n')
    for network in pruned_nets.keys():
        rxn_count = count_bitstring(network)
        out_row = '\t'.join(
            [network, str(rxn_count), ','.join(pruned_nets[network])])
        out.write(out_row + '\n')
Пример #7
0
    export_model.objective = exp_bm_rxn
    no_export_model.objective = no_exp_bm_rxn
    scn.choose_inputs(int(ins), no_export_model, no_exp_bm_rxn)
    export_model.add_reactions(
        [rxn.copy() for rxn in no_export_model.boundary])
    # see if this combination works for both networks
    export_solution = export_model.optimize()
    no_export_solution = no_export_model.optimize()

# do min-flux pruning and get reaction-inclusion vectors for both networks
print('Using minimum-flux pruner.')
min_pruned_export = scn.min_flux_prune(export_model, exp_bm_rxn)
min_pruned_no_export = scn.min_flux_prune(no_export_model, no_exp_bm_rxn)
min_export_count = len(min_pruned_export.reactions)
min_no_export_count = len(min_pruned_no_export.reactions)
min_export_rxn_incl = scn.make_rxn_incl(export_model, min_pruned_export)
min_no_export_rxn_incl = scn.make_rxn_incl(no_export_model,
                                           min_pruned_no_export)

# randomly prune each network as many times as specified
print('Randomly pruning with export reactions.')
(rand_export_pruned_rxn_counts, rand_export_prune_counts,
 rand_export_pruned_nets) = do_many_rand_prunes(export_model, exp_bm_rxn,
                                                int(reps))

print('Randomly pruning without export reactions.')
(rand_no_export_pruned_rxn_counts, rand_no_export_prune_counts,
 rand_no_export_pruned_nets) = do_many_rand_prunes(no_export_model,
                                                   no_exp_bm_rxn, int(reps))

print('Preparing text output.')