示例#1
0
def make_diet_from_models(model_file_names, diet_file_name, bound=None):
    """ Make a diet file from the exchange reactions in a list of models.

    Parameters
    ----------
    model_file_names : list of str
        List of path names to model files
    diet_file_name : str
        Path to file to store diet conditions in JSON format
    bound : float, optional
        Bound to set on every exchange reaction, when None use bound from first
        model that contains exchange reaction
    """
    def get_active_bound(reaction):
        """ For an active boundary reaction, return the relevant bound. """
        if reaction.reactants:
            return -reaction.lower_bound
        elif reaction.products:
            return reaction.upper_bound

    if bound is not None:
        bound = float(abs(bound))
    diet = dict()
    for name in model_file_names:
        model = load_model_from_file(name)
        exchange_reactions = model.reactions.query(
            lambda x: x.startswith('EX_'), 'id')
        for rxn in exchange_reactions:
            if rxn.id not in diet:
                if bound is None:
                    diet[rxn.id] = get_active_bound(rxn)
                else:
                    diet[rxn.id] = bound
    json.dump(diet, open(diet_file_name, 'w'), indent=4)
    return
示例#2
0
def get_exchange_reaction_ids(model_file_names):
    """ Get the set of unique exchange reaction IDs and model IDs from a list of models.

    Parameters
    ----------
    model_file_names : list of str
        List of path names to model files

    Returns
    -------
    set
        Set of exchange reaction IDs in input models
    list
        List of model IDs from input models
    """

    all_exchange_reactions = set()
    model_ids = list()
    for name in model_file_names:
        model = load_model_from_file(name)
        model_ids.append(model.id)
        exchange_reactions = model.reactions.query(
            lambda x: x.startswith('EX_'), 'id')
        for rxn in exchange_reactions:
            all_exchange_reactions.add(rxn.id)
    return all_exchange_reactions, model_ids
示例#3
0
def check_for_growth(model_file_name):
    """ Optimize a model and check for growth under conditions set in model.

    Parameters
    ----------
    model_file_name : str
        Path to input model file

    Returns
    -------
    dict
        Dictionary with summary of optimization results
    """

    model = load_model_from_file(model_file_name)
    solution = model.optimize()
    summary = {'grows': True, 'message': None}
    if solution.status != 'optimal':
        summary['grows'] = False
        summary['message'] = 'Model {0} in file {1} fails to optimize'.format(
            model.id, model_file_name)
    elif solution.objective_value <= 0.0001:
        summary['grows'] = False
        summary['message'] = 'Model {0} in file {1} does not produce growth under given conditions' \
                             .format(model.id, model_file_name)
    return summary
示例#4
0
def optimize_for_species(model_file_name, species_id, medium,
                         time_point_folder):
    """ Knock out the other species from a two species model, optimize, and save results.
    """

    LOGGER.info('Loading model {0} to optimize for {1}'.format(
        model_file_name, species_id))
    pair_model = load_model_from_file(model_file_name)

    # Figure out the species to knock out in the pair community model.
    species_index = -1
    for index in range(len(pair_model.notes['species'])):
        if pair_model.notes['species'][index]['id'] == species_id:
            species_index = index
    if species_index < 0:
        raise Exception(
            'Species {0} is not a member of the community'.format(species_id))
    if species_index == 0:
        knockout_index = 1
    else:
        knockout_index = 0
    knockout_id = pair_model.notes['species'][knockout_index]['id']
    LOGGER.info('Going to knock out {0} from index {1}'.format(
        knockout_id, knockout_index))

    with pair_model:
        # Apply the medium.
        apply_medium(pair_model, medium)

        # Knock out all of the reactions for the specified species.
        knockout_reactions = pair_model.reactions.query(
            lambda r: r.startswith(knockout_id), 'id')
        for reaction in knockout_reactions:
            reaction.knock_out()

        # Remove the species objective from the community model objective.
        knockout_objective = pair_model.reactions.get_by_id(
            pair_model.notes['species'][knockout_index]['objective'])
        linear_coefficients = linear_reaction_coefficients(pair_model)
        del linear_coefficients[knockout_objective]
        set_objective(pair_model, linear_coefficients)
        save_model_to_file(pair_model,
                           join(time_point_folder, pair_model.id + '.json'))

        # Optimize the community model with the specified species knocked out.
        solution = pair_model.optimize()
        solution.fluxes.to_json(
            join(time_point_folder, pair_model.id +
                 '-solution.json'))  # , orient='records', lines=True

    return solution
示例#5
0
def set_model_id_prefix(model_file_names, prefix='M'):
    """ Set a prefix on model IDs for all models in a list of models.

    Model IDs must start with an alphabetic character so they are interpreted as
    strings in data frames. Models created by ModelSEED typically use a PATRIC
    genome ID as the model ID which is a number.

    Parameters
    ----------
    model_file_names : list of str
        List of path names to model files
    prefix : str, optional
        String to use as prefix for model IDs
    """

    for name in model_file_names:
        model = load_model_from_file(name)
        model.id = prefix + model.id
        save_model_to_file(model, name)
    return
示例#6
0
def optimize_single_model(model_file_name, medium):
    """ Optimize a single species model on a given medium.

    This function is used as a target function in a multiprocessing pool.

    Note that we chose to read the model from a file each time instead of loading
    the model into memory once at the beginning of the simulation. This lowers
    the memory requirements of the simulation and there is no need to revert the
    model after the optimization. But there are more accesses of the file system.

    Parameters
    ----------
    model_file_name : cobra.core.Model
        Single species model to be optimized
    medium : dict
        Dictionary with exchange reaction ID as key and bound as value

    Returns
    -------
    dict
        Dictionary with details on solution
    """

    # Confirmed that growth rates are the same as when run solo in pair model.
    # Are we only doing this to get the exchange reaction fluxes which are
    # unavailable from mminte output?

    model = load_model_from_file(model_file_name)
    details = {'model_id': model.id}
    apply_medium(model, medium)
    solution = model.optimize()
    details['status'] = solution.status
    details['objective_value'] = solution.objective_value
    exchange_reactions = model.reactions.query(lambda x: x.startswith('EX_'),
                                               'id')
    details['exchange_fluxes'] = dict()
    if solution.status == 'optimal':
        for rxn in exchange_reactions:
            if solution.fluxes[rxn.id] != 0.0:
                details['exchange_fluxes'][rxn.id] = solution.fluxes[rxn.id]
    return details
示例#7
0
def run_simulation(time_interval,
                   single_file_names,
                   pair_file_names,
                   diet_file_name,
                   density_file_name,
                   data_folder,
                   time_step=0.5,
                   k=1,
                   n_processes=None):
    """ Run a simulation over a time interval.

    Parameters
    ----------
    time_interval : range
        Range of time points for running the simulation
    single_file_names : list of str
        List of path names to single species models
    pair_file_names : list of str
        List of path names to two species community model files
    diet_file_name : str
        Path to file with initial diet conditions in JSON format
    density_file_name : str
        Path to file with initial population densities in CSV format
    data_folder : str
        Path to folder for storing data generated at each time point
    time_step : float, optional
        Adjustment to time point where 1 is one hour, 0.5 is 30 minutes, etc.
    k : int, optional
        Maximum size of the population that the environment has the capacity to support
    n_processes: int, optional
        Number of processes in job pool
    """

    # Validate time_step parameter.
    if time_step <= 0.0 or time_step > 1.0:
        raise ValueError(
            'time_step parameter must be a value greater than 0 and less than or equal to 1'
        )

    # Get the initial population density values.
    density = pd.read_csv(density_file_name,
                          dtype={
                              'ID': str,
                              'DENSITY': float
                          })
    if not set(density_columns).issubset(density.columns):
        raise ValueError(
            'Required columns {0} not available in initial population density file "{1}"'
            .format(density_columns, density_file_name))
    invalid_fields = density.isnull().values.sum()
    if invalid_fields > 0:
        raise ValueError(
            'There are {0} fields with invalid values in initial population density file "{1}"'
            .format(invalid_fields, density_file_name))

    # Get the initial diet conditions.
    diet = json.load(open(diet_file_name))

    # Set diet for first time step by adding exchange reactions from the single
    # species models that are not in the initial diet. This allows metabolites
    # produced by a species to become available in the diet conditions during the
    # simulation.
    model_exchanges, model_ids = get_exchange_reaction_ids(single_file_names)
    initial_exchanges = set(diet.keys())
    if initial_exchanges > model_exchanges:
        warn(
            'Diet file "{0}" contains more exchange reactions than there are in single species models'
            .format(diet_file_name))
    if model_exchanges.issuperset(
            initial_exchanges):  # @todo is this necessary?
        for rxn_id in (model_exchanges - initial_exchanges):
            diet[rxn_id] = 0.0
    json.dump(diet,
              open(join(data_folder, 'initial-diet.json'), 'w'),
              indent=4)

    # Confirm the model IDs in the initial density file match the model IDs in the
    # list of single species models.
    if density.shape[0] != len(model_ids):
        for index, row in density.iterrows():
            if row['ID'] not in model_ids:
                LOGGER.error(
                    'Model ID "{0}" on line {1} of initial population density file "{2}" is not available '
                    'in list of single species models'.format(
                        row['ID'], index + 2, density_file_name))
        for model_id in model_ids:
            if not model_id in density.ID.values:
                LOGGER.error(
                    'Model ID "{0}" from list of single species models is not available in '
                    'initial population density file'.format(model_id))
        raise ValueError(
            'Number of species ({0}) in initial density file does not match '
            'number of single species models ({1})'.format(
                density.shape[0], len(model_ids)))
    if density.loc[density['ID'].isin(model_ids)].shape[0] != len(model_ids):
        for index, row in density.iterrows():
            if row['ID'] not in model_ids:
                LOGGER.error(
                    'Model ID "{0}" on line {1} of initial population density file "{2}" is not available '
                    'in list of single species models'.format(
                        row['ID'], index + 2, density_file_name))
        raise ValueError(
            'One or more model IDs in initial density file do not match '
            'model IDs in single species models')

    # Find all of the pair community models that each single species model is a member of.
    single_to_pairs = defaultdict(list)
    for model_file_name in pair_file_names:
        pair_model = load_model_from_file(model_file_name)
        single_to_pairs[pair_model.notes['species'][0]['id']].append(
            model_file_name)
        single_to_pairs[pair_model.notes['species'][1]['id']].append(
            model_file_name)

    # Create a job pool for running optimizations.
    if n_processes is None:
        n_processes = min(cpu_count(), 4)
    pool = Pool(n_processes)

    # Run the simulation over the specified time interval.
    for time_point in time_interval:
        # Start this time point.
        time_point_id = '{0:04d}'.format(time_point + 1)
        LOGGER.info('[%s] STARTED TIME POINT', time_point_id)
        time_point_folder = join(data_folder, 'timepoint-' + time_point_id)
        if not exists(time_point_folder):
            makedirs(time_point_folder)
        pair_rate_file_name = join(time_point_folder,
                                   'pair-rates-{0}.csv'.format(time_point_id))
        effects_matrix_file_name = join(
            time_point_folder, 'effects-matrix-{0}.csv'.format(time_point_id))
        density_file_name = join(time_point_folder,
                                 'density-{0}.csv'.format(time_point_id))
        single_rate_file_name = join(
            time_point_folder, 'single-rates-{0}.csv').format(time_point_id)
        next_diet_file_name = join(time_point_folder,
                                   'diet-{0}.json'.format(time_point_id))

        # Calculate the growth rates for each two species model under the current diet conditions.
        growth_rates, alone = calculate_growth_rates(pair_file_names, diet,
                                                     pool, pair_rate_file_name,
                                                     time_point_id,
                                                     time_point_folder,
                                                     single_to_pairs)

        # Create the effects matrix.
        effects_matrix = create_effects_matrix(growth_rates,
                                               effects_matrix_file_name,
                                               time_point_id)

        # Run Leslie-Gower algorithm to calculate new population densities.
        density = leslie_gower(effects_matrix, density, density_file_name,
                               time_point_id, alone, k, time_step)

        # Get the exchange reaction fluxes from optimizing single species models.
        exchange_fluxes = get_exchange_fluxes(single_file_names, diet, pool,
                                              single_rate_file_name,
                                              time_point_id)

        # Create diet conditions for next time point.
        diet = create_next_diet(diet, exchange_fluxes, density,
                                next_diet_file_name, time_step, time_point_id)

    # Cleanup and store results from last time step in data folder.
    pool.close()
    json.dump(diet, open(join(data_folder, 'final-diet.json'), 'w'), indent=4)
    density.to_csv(join(data_folder, 'final-density.csv'))

    return
示例#8
0
def optimize_pair_model(model_file_name, medium):
    """ Optimize a two species community model.

    This function is used as a target function in a multiprocessing pool. Since the
    model is read from a file each time the function runs there is no need to revert
    the model after the optimization.

    Current approach is to calculate the effect of species B on the growth of
    species A using the equation "G_ta / G_a" where G_ta is the growth rate of
    species A in the presence of species B and G_a is the growth rate of species
    A in the absence of species B. The same approach is used to calculate the
    effect of species A on the growth of species B.

    Note that an infeasible solution is considered the same as no growth.

    Parameters
    ----------
    model_file_name : str
        Path to two species community model file
    medium : dict
        Dictionary with exchange reaction ID as key and bound as value

    Returns
    -------
    pandas.Series
        Growth rate details for interaction between two species in pair
    """

    # Optimize the model with two species together, one species knocked out, and
    # other species knocked out.
    pair_model = load_model_from_file(model_file_name)
    apply_medium(pair_model, medium)

    a_id = pair_model.notes['species'][0]['id']
    a_objective = pair_model.notes['species'][0]['objective']
    b_id = pair_model.notes['species'][1]['id']
    b_objective = pair_model.notes['species'][1]['objective']

    t_solution = pair_model.optimize()
    a_solution = single_species_knockout(pair_model, b_id)
    b_solution = single_species_knockout(pair_model, a_id)

    # Round very small growth rates to zero.
    if t_solution.fluxes[a_objective] < NO_GROWTH:
        t_solution.fluxes[a_objective] = 0.
    if t_solution.fluxes[b_objective] < NO_GROWTH:
        t_solution.fluxes[b_objective] = 0.
    if a_solution.fluxes[a_objective] < NO_GROWTH:
        a_solution.fluxes[a_objective] = 0.
    if b_solution.fluxes[b_objective] < NO_GROWTH:
        b_solution.fluxes[b_objective] = 0.

    # Evaluate the interaction between the two species.
    if t_solution.status == 'optimal':
        a_together = t_solution.fluxes[a_objective]
        b_together = t_solution.fluxes[b_objective]
    else:
        a_together = 0.0
        b_together = 0.0

    if a_solution.status == 'optimal':
        a_alone = a_solution.fluxes[a_objective]
    else:
        a_alone = 0.0
    if a_alone != 0.0:
        alone = a_alone
    else:
        alone = ALMOST_ZERO
    if a_together != 0.0 or a_alone != 0.0:
        a_effect = a_together / alone  # See note above for description
    else:
        a_effect = 0.0

    if b_solution.status == 'optimal':
        b_alone = b_solution.fluxes[b_objective]
    else:
        b_alone = 0.0
    if b_alone != 0.0:
        alone = b_alone
    else:
        alone = ALMOST_ZERO
    if b_together != 0.0 or b_alone != 0.0:
        b_effect = b_together / alone  # See note above for description
    else:
        b_effect = 0.0

    return pd.Series([
        a_id, b_id, a_together, a_alone, a_effect, b_together, b_alone,
        b_effect
    ],
                     index=pair_rate_columns)