def make_diet_from_models(model_file_names, diet_file_name, bound=None): """ Make a diet file from the exchange reactions in a list of models. Parameters ---------- model_file_names : list of str List of path names to model files diet_file_name : str Path to file to store diet conditions in JSON format bound : float, optional Bound to set on every exchange reaction, when None use bound from first model that contains exchange reaction """ def get_active_bound(reaction): """ For an active boundary reaction, return the relevant bound. """ if reaction.reactants: return -reaction.lower_bound elif reaction.products: return reaction.upper_bound if bound is not None: bound = float(abs(bound)) diet = dict() for name in model_file_names: model = load_model_from_file(name) exchange_reactions = model.reactions.query( lambda x: x.startswith('EX_'), 'id') for rxn in exchange_reactions: if rxn.id not in diet: if bound is None: diet[rxn.id] = get_active_bound(rxn) else: diet[rxn.id] = bound json.dump(diet, open(diet_file_name, 'w'), indent=4) return
def get_exchange_reaction_ids(model_file_names): """ Get the set of unique exchange reaction IDs and model IDs from a list of models. Parameters ---------- model_file_names : list of str List of path names to model files Returns ------- set Set of exchange reaction IDs in input models list List of model IDs from input models """ all_exchange_reactions = set() model_ids = list() for name in model_file_names: model = load_model_from_file(name) model_ids.append(model.id) exchange_reactions = model.reactions.query( lambda x: x.startswith('EX_'), 'id') for rxn in exchange_reactions: all_exchange_reactions.add(rxn.id) return all_exchange_reactions, model_ids
def check_for_growth(model_file_name): """ Optimize a model and check for growth under conditions set in model. Parameters ---------- model_file_name : str Path to input model file Returns ------- dict Dictionary with summary of optimization results """ model = load_model_from_file(model_file_name) solution = model.optimize() summary = {'grows': True, 'message': None} if solution.status != 'optimal': summary['grows'] = False summary['message'] = 'Model {0} in file {1} fails to optimize'.format( model.id, model_file_name) elif solution.objective_value <= 0.0001: summary['grows'] = False summary['message'] = 'Model {0} in file {1} does not produce growth under given conditions' \ .format(model.id, model_file_name) return summary
def optimize_for_species(model_file_name, species_id, medium, time_point_folder): """ Knock out the other species from a two species model, optimize, and save results. """ LOGGER.info('Loading model {0} to optimize for {1}'.format( model_file_name, species_id)) pair_model = load_model_from_file(model_file_name) # Figure out the species to knock out in the pair community model. species_index = -1 for index in range(len(pair_model.notes['species'])): if pair_model.notes['species'][index]['id'] == species_id: species_index = index if species_index < 0: raise Exception( 'Species {0} is not a member of the community'.format(species_id)) if species_index == 0: knockout_index = 1 else: knockout_index = 0 knockout_id = pair_model.notes['species'][knockout_index]['id'] LOGGER.info('Going to knock out {0} from index {1}'.format( knockout_id, knockout_index)) with pair_model: # Apply the medium. apply_medium(pair_model, medium) # Knock out all of the reactions for the specified species. knockout_reactions = pair_model.reactions.query( lambda r: r.startswith(knockout_id), 'id') for reaction in knockout_reactions: reaction.knock_out() # Remove the species objective from the community model objective. knockout_objective = pair_model.reactions.get_by_id( pair_model.notes['species'][knockout_index]['objective']) linear_coefficients = linear_reaction_coefficients(pair_model) del linear_coefficients[knockout_objective] set_objective(pair_model, linear_coefficients) save_model_to_file(pair_model, join(time_point_folder, pair_model.id + '.json')) # Optimize the community model with the specified species knocked out. solution = pair_model.optimize() solution.fluxes.to_json( join(time_point_folder, pair_model.id + '-solution.json')) # , orient='records', lines=True return solution
def set_model_id_prefix(model_file_names, prefix='M'): """ Set a prefix on model IDs for all models in a list of models. Model IDs must start with an alphabetic character so they are interpreted as strings in data frames. Models created by ModelSEED typically use a PATRIC genome ID as the model ID which is a number. Parameters ---------- model_file_names : list of str List of path names to model files prefix : str, optional String to use as prefix for model IDs """ for name in model_file_names: model = load_model_from_file(name) model.id = prefix + model.id save_model_to_file(model, name) return
def optimize_single_model(model_file_name, medium): """ Optimize a single species model on a given medium. This function is used as a target function in a multiprocessing pool. Note that we chose to read the model from a file each time instead of loading the model into memory once at the beginning of the simulation. This lowers the memory requirements of the simulation and there is no need to revert the model after the optimization. But there are more accesses of the file system. Parameters ---------- model_file_name : cobra.core.Model Single species model to be optimized medium : dict Dictionary with exchange reaction ID as key and bound as value Returns ------- dict Dictionary with details on solution """ # Confirmed that growth rates are the same as when run solo in pair model. # Are we only doing this to get the exchange reaction fluxes which are # unavailable from mminte output? model = load_model_from_file(model_file_name) details = {'model_id': model.id} apply_medium(model, medium) solution = model.optimize() details['status'] = solution.status details['objective_value'] = solution.objective_value exchange_reactions = model.reactions.query(lambda x: x.startswith('EX_'), 'id') details['exchange_fluxes'] = dict() if solution.status == 'optimal': for rxn in exchange_reactions: if solution.fluxes[rxn.id] != 0.0: details['exchange_fluxes'][rxn.id] = solution.fluxes[rxn.id] return details
def run_simulation(time_interval, single_file_names, pair_file_names, diet_file_name, density_file_name, data_folder, time_step=0.5, k=1, n_processes=None): """ Run a simulation over a time interval. Parameters ---------- time_interval : range Range of time points for running the simulation single_file_names : list of str List of path names to single species models pair_file_names : list of str List of path names to two species community model files diet_file_name : str Path to file with initial diet conditions in JSON format density_file_name : str Path to file with initial population densities in CSV format data_folder : str Path to folder for storing data generated at each time point time_step : float, optional Adjustment to time point where 1 is one hour, 0.5 is 30 minutes, etc. k : int, optional Maximum size of the population that the environment has the capacity to support n_processes: int, optional Number of processes in job pool """ # Validate time_step parameter. if time_step <= 0.0 or time_step > 1.0: raise ValueError( 'time_step parameter must be a value greater than 0 and less than or equal to 1' ) # Get the initial population density values. density = pd.read_csv(density_file_name, dtype={ 'ID': str, 'DENSITY': float }) if not set(density_columns).issubset(density.columns): raise ValueError( 'Required columns {0} not available in initial population density file "{1}"' .format(density_columns, density_file_name)) invalid_fields = density.isnull().values.sum() if invalid_fields > 0: raise ValueError( 'There are {0} fields with invalid values in initial population density file "{1}"' .format(invalid_fields, density_file_name)) # Get the initial diet conditions. diet = json.load(open(diet_file_name)) # Set diet for first time step by adding exchange reactions from the single # species models that are not in the initial diet. This allows metabolites # produced by a species to become available in the diet conditions during the # simulation. model_exchanges, model_ids = get_exchange_reaction_ids(single_file_names) initial_exchanges = set(diet.keys()) if initial_exchanges > model_exchanges: warn( 'Diet file "{0}" contains more exchange reactions than there are in single species models' .format(diet_file_name)) if model_exchanges.issuperset( initial_exchanges): # @todo is this necessary? for rxn_id in (model_exchanges - initial_exchanges): diet[rxn_id] = 0.0 json.dump(diet, open(join(data_folder, 'initial-diet.json'), 'w'), indent=4) # Confirm the model IDs in the initial density file match the model IDs in the # list of single species models. if density.shape[0] != len(model_ids): for index, row in density.iterrows(): if row['ID'] not in model_ids: LOGGER.error( 'Model ID "{0}" on line {1} of initial population density file "{2}" is not available ' 'in list of single species models'.format( row['ID'], index + 2, density_file_name)) for model_id in model_ids: if not model_id in density.ID.values: LOGGER.error( 'Model ID "{0}" from list of single species models is not available in ' 'initial population density file'.format(model_id)) raise ValueError( 'Number of species ({0}) in initial density file does not match ' 'number of single species models ({1})'.format( density.shape[0], len(model_ids))) if density.loc[density['ID'].isin(model_ids)].shape[0] != len(model_ids): for index, row in density.iterrows(): if row['ID'] not in model_ids: LOGGER.error( 'Model ID "{0}" on line {1} of initial population density file "{2}" is not available ' 'in list of single species models'.format( row['ID'], index + 2, density_file_name)) raise ValueError( 'One or more model IDs in initial density file do not match ' 'model IDs in single species models') # Find all of the pair community models that each single species model is a member of. single_to_pairs = defaultdict(list) for model_file_name in pair_file_names: pair_model = load_model_from_file(model_file_name) single_to_pairs[pair_model.notes['species'][0]['id']].append( model_file_name) single_to_pairs[pair_model.notes['species'][1]['id']].append( model_file_name) # Create a job pool for running optimizations. if n_processes is None: n_processes = min(cpu_count(), 4) pool = Pool(n_processes) # Run the simulation over the specified time interval. for time_point in time_interval: # Start this time point. time_point_id = '{0:04d}'.format(time_point + 1) LOGGER.info('[%s] STARTED TIME POINT', time_point_id) time_point_folder = join(data_folder, 'timepoint-' + time_point_id) if not exists(time_point_folder): makedirs(time_point_folder) pair_rate_file_name = join(time_point_folder, 'pair-rates-{0}.csv'.format(time_point_id)) effects_matrix_file_name = join( time_point_folder, 'effects-matrix-{0}.csv'.format(time_point_id)) density_file_name = join(time_point_folder, 'density-{0}.csv'.format(time_point_id)) single_rate_file_name = join( time_point_folder, 'single-rates-{0}.csv').format(time_point_id) next_diet_file_name = join(time_point_folder, 'diet-{0}.json'.format(time_point_id)) # Calculate the growth rates for each two species model under the current diet conditions. growth_rates, alone = calculate_growth_rates(pair_file_names, diet, pool, pair_rate_file_name, time_point_id, time_point_folder, single_to_pairs) # Create the effects matrix. effects_matrix = create_effects_matrix(growth_rates, effects_matrix_file_name, time_point_id) # Run Leslie-Gower algorithm to calculate new population densities. density = leslie_gower(effects_matrix, density, density_file_name, time_point_id, alone, k, time_step) # Get the exchange reaction fluxes from optimizing single species models. exchange_fluxes = get_exchange_fluxes(single_file_names, diet, pool, single_rate_file_name, time_point_id) # Create diet conditions for next time point. diet = create_next_diet(diet, exchange_fluxes, density, next_diet_file_name, time_step, time_point_id) # Cleanup and store results from last time step in data folder. pool.close() json.dump(diet, open(join(data_folder, 'final-diet.json'), 'w'), indent=4) density.to_csv(join(data_folder, 'final-density.csv')) return
def optimize_pair_model(model_file_name, medium): """ Optimize a two species community model. This function is used as a target function in a multiprocessing pool. Since the model is read from a file each time the function runs there is no need to revert the model after the optimization. Current approach is to calculate the effect of species B on the growth of species A using the equation "G_ta / G_a" where G_ta is the growth rate of species A in the presence of species B and G_a is the growth rate of species A in the absence of species B. The same approach is used to calculate the effect of species A on the growth of species B. Note that an infeasible solution is considered the same as no growth. Parameters ---------- model_file_name : str Path to two species community model file medium : dict Dictionary with exchange reaction ID as key and bound as value Returns ------- pandas.Series Growth rate details for interaction between two species in pair """ # Optimize the model with two species together, one species knocked out, and # other species knocked out. pair_model = load_model_from_file(model_file_name) apply_medium(pair_model, medium) a_id = pair_model.notes['species'][0]['id'] a_objective = pair_model.notes['species'][0]['objective'] b_id = pair_model.notes['species'][1]['id'] b_objective = pair_model.notes['species'][1]['objective'] t_solution = pair_model.optimize() a_solution = single_species_knockout(pair_model, b_id) b_solution = single_species_knockout(pair_model, a_id) # Round very small growth rates to zero. if t_solution.fluxes[a_objective] < NO_GROWTH: t_solution.fluxes[a_objective] = 0. if t_solution.fluxes[b_objective] < NO_GROWTH: t_solution.fluxes[b_objective] = 0. if a_solution.fluxes[a_objective] < NO_GROWTH: a_solution.fluxes[a_objective] = 0. if b_solution.fluxes[b_objective] < NO_GROWTH: b_solution.fluxes[b_objective] = 0. # Evaluate the interaction between the two species. if t_solution.status == 'optimal': a_together = t_solution.fluxes[a_objective] b_together = t_solution.fluxes[b_objective] else: a_together = 0.0 b_together = 0.0 if a_solution.status == 'optimal': a_alone = a_solution.fluxes[a_objective] else: a_alone = 0.0 if a_alone != 0.0: alone = a_alone else: alone = ALMOST_ZERO if a_together != 0.0 or a_alone != 0.0: a_effect = a_together / alone # See note above for description else: a_effect = 0.0 if b_solution.status == 'optimal': b_alone = b_solution.fluxes[b_objective] else: b_alone = 0.0 if b_alone != 0.0: alone = b_alone else: alone = ALMOST_ZERO if b_together != 0.0 or b_alone != 0.0: b_effect = b_together / alone # See note above for description else: b_effect = 0.0 return pd.Series([ a_id, b_id, a_together, a_alone, a_effect, b_together, b_alone, b_effect ], index=pair_rate_columns)