Пример #1
0
def define_environment(medium, media_db, community, mode, aerobic, verbose,
                       min_mol_weight, use_lp):
    max_uptake = 10.0 * len(community.organisms)

    if medium:
        fmt_func = lambda x: "R_EX_M_{}_e_pool".format(x)
        env = Environment.from_compounds(media_db[medium],
                                         fmt_func=fmt_func,
                                         max_uptake=max_uptake)
        medium_id = medium
    elif mode == "global":
        env = Environment.complete(community.merged, max_uptake=max_uptake)
        medium_id = 'complete'

        if aerobic is not None and aerobic:
            env["R_EX_M_o2_e_pool"] = (-max_uptake, inf)

        if aerobic is not None and not aerobic:
            env["R_EX_M_o2_e_pool"] = (0, inf)

    else:
        env = minimal_environment(community,
                                  aerobic,
                                  verbose=verbose,
                                  min_mol_weight=min_mol_weight,
                                  use_lp=use_lp,
                                  max_uptake=max_uptake)
        medium_id = "minimal"

    return medium_id, env
Пример #2
0
def maincall(inputfiles,
             flavor=None,
             init=None,
             mediadb=None,
             outputfile=None):

    if not flavor:
        flavor = config.get('sbml', 'default_flavor')

    if outputfile:
        model_id = os.path.splitext(os.path.basename(outputfile))[0]
    else:
        model_id = 'community'
        outputfile = 'community.xml'

    models = [
        load_cbmodel(inputfile, flavor=flavor) for inputfile in inputfiles
    ]
    community = Community(model_id, models)
    model = community.merged_model

    if init:
        if not mediadb:
            mediadb = project_dir + config.get('input', 'media_library')

        try:
            media_db = load_media_db(mediadb)
        except IOError:
            raise IOError('Failed to load media library:' + mediadb)

        init_env = Environment.from_compounds(media_db[init])
        init_env.apply(model, inplace=True)

    save_cbmodel(model, outputfile, flavor=flavor)
Пример #3
0
def benchmark_biolog(model, medium, data, min_growth=0.1, max_uptake=10):

    env = Environment.from_compounds(medium)
    constraints = env.apply(model, inplace=False, warning=False)
    solver = solver_instance(model)
    data = data[["bigg_id", "growth"]].dropna()
    result = {}

    for _, row in data.iterrows():
        met = row["bigg_id"]
        in_vivo_growth = row["growth"] in {'++', '+'}
        r_id = f"R_EX_{met}_e"

        if r_id in model.reactions:
            tmp = constraints[r_id] if r_id in constraints else (0, 0)
            constraints[r_id] = (-max_uptake, 0)
            sol = FBA(model, constraints=constraints, solver=solver)
            in_silico_growth = Status.OPTIMAL and sol.fobj > min_growth
            constraints[r_id] = tmp
        else:
            in_silico_growth = False

        if in_silico_growth:
            result[met] = 'TP' if in_vivo_growth else 'FP'
        else:
            result[met] = 'FN' if in_vivo_growth else 'TN'

    return result
Пример #4
0
def benchmark_essentiality(model, medium, in_vivo):

    if medium is not None:
        env = Environment.from_compounds(medium)
    else:
        env = Environment.complete(model)

    constraints = env.apply(model, inplace=False, warning=False)
    in_silico = essential_genes(model, constraints=constraints, min_growth=0.1)

    result = {}
    for gene, is_essential in in_vivo.items():
        if is_essential:
            if gene in in_silico:
                result[gene] = 'TP'
            else:
                result[gene] = 'FN'
        else:
            if gene in in_silico:
                result[gene] = 'FP'
            else:
                result[gene] = 'TN'

    return result
Пример #5
0
def minimal_environment(community,
                        aerobic=None,
                        min_mol_weight=False,
                        min_growth=0.1,
                        max_uptake=10,
                        validate=False,
                        verbose=True,
                        use_lp=False):

    exch_reactions = set(community.merged.get_exchange_reactions())

    exch_reactions -= {"R_EX_M_h2o_e_pool"}
    community.merged.set_flux_bounds("R_EX_M_h2o_e_pool", -inf, inf)

    if aerobic is not None:
        exch_reactions -= {"R_EX_M_o2_e_pool"}
        if aerobic:
            community.merged.set_flux_bounds("R_EX_M_o2_e_pool", -max_uptake,
                                             inf)
        else:
            community.merged.set_flux_bounds("R_EX_M_o2_e_pool", 0, inf)

    ex_rxns, sol = minimal_medium(community.merged,
                                  exchange_reactions=exch_reactions,
                                  min_mass_weight=min_mol_weight,
                                  min_growth=min_growth,
                                  milp=(not use_lp),
                                  max_uptake=max_uptake,
                                  validate=validate,
                                  warnings=False)

    if ex_rxns is None:
        if verbose:
            warn('Failed to find a medium for interacting community.')
        return None
    else:
        if aerobic is not None and aerobic:
            ex_rxns |= {"R_EX_M_o2_e_pool"}
        env = Environment.from_reactions(ex_rxns, max_uptake=max_uptake)
        env["R_EX_M_h2o_e_pool"] = (-inf, inf)
        return env
Пример #6
0
def run_abiotic(comm_id, sense, community, medium_id, excluded_mets, env,
                verbose, min_mol_weight, other_mets, n, p, ignore_coupling):

    medium = set(env.get_compounds(fmt_func=lambda x: x[7:-7]))
    max_uptake = 10.0 * len(community.organisms)

    if sense == 'add':
        modified = sorted(other_mets - (medium | excluded_mets))

    if sense == 'rm':
        modified = sorted(other_mets - (medium | excluded_mets))
        n_extra_cpds = 2 * p
        modified = sample(modified, n_extra_cpds)
        medium = medium | set(modified)
        env = Environment.from_compounds(
            medium,
            fmt_func=lambda x: f"R_EX_M_{x}_e_pool",
            max_uptake=max_uptake)

    if len(modified) < p:
        raise RuntimeError(
            "Insufficient compounds ({}) to perform ({}) perturbations.".
            format(len(modified), p))

    if n == 0:
        do_all = True
        n = len(modified)
        if verbose:
            print(
                'Running {} systematic abiotic perturbations with 1 compound...'
                .format(n))
    else:
        do_all = False
        if verbose:
            print(
                'Running {} random abiotic perturbations with {} compounds...'.
                format(n, p))

    data = run_detailed(comm_id, community, medium_id, excluded_mets, env,
                        False, min_mol_weight, ignore_coupling)

    for i in range(n):
        if do_all:
            if sense == 'add':
                new_compounds = list(medium) + [modified[i]]
            if sense == 'rm':
                new_compounds = medium - {modified[i]}
            new_id = "{}_{}".format(medium_id, modified[i])
        else:
            if sense == 'add':
                new_compounds = list(medium) + sample(modified, p)
            if sense == 'rm':
                new_compounds = medium - set(sample(modified, p))
            new_id = "{}_{}".format(medium_id, i + 1)

        new_env = Environment.from_compounds(
            new_compounds,
            fmt_func=lambda x: f"R_EX_M_{x}_e_pool",
            max_uptake=max_uptake)
        entries = run_detailed(comm_id, community, new_id, excluded_mets,
                               new_env, False, min_mol_weight, ignore_coupling)
        data.extend(entries)

    return data
Пример #7
0
def maincall(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None,
         ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None,
         mediadb=None, default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, reference=None,
         ref_score=None, recursive_mode=False):

    if recursive_mode:
        model_id = os.path.splitext(os.path.basename(inputfile))[0]

        if outputfile:
            outputfile = f'{outputfile}/{model_id}.xml'
        else:
            outputfile = os.path.splitext(inputfile)[0] + '.xml'

    else:
        if outputfile:
            model_id = os.path.splitext(os.path.basename(outputfile))[0]
        else:
            model_id = os.path.splitext(os.path.basename(inputfile))[0]
            outputfile = os.path.splitext(inputfile)[0] + '.xml'

    model_id = build_model_id(model_id)

    outputfolder = os.path.abspath(os.path.dirname(outputfile))

    if not os.path.exists(outputfolder):
        try:
            os.makedirs(outputfolder)
        except:
            print('Unable to create output folder:', outputfolder)
            return

    if soft:
        try:
            soft_constraints = load_soft_constraints(soft)
        except IOError:
            raise IOError('Failed to load soft-constraints file:' + soft)
    else:
        soft_constraints = None

    if hard:
        try:
            hard_constraints = load_hard_constraints(hard)
        except IOError:
            raise IOError('Failed to load hard-constraints file:' + hard)
    else:
        hard_constraints = None

    if input_type == 'refseq':

        if verbose:
            print(f'Downloading genome {inputfile} from NCBI...')

        ncbi_table = load_ncbi_table(project_dir + config.get('input', 'refseq'))
        inputfile = download_ncbi_genome(inputfile, ncbi_table)

        if not inputfile:
            print('Failed to download genome from NCBI.')
            return

        input_type = 'protein' if inputfile.endswith('.faa.gz') else 'dna'

    if input_type == 'protein' or input_type == 'dna':
        if verbose:
            print('Running diamond...')
        diamond_db = project_dir + config.get('generated', 'diamond_db')
        blast_output = os.path.splitext(inputfile)[0] + '.tsv'
        exit_code = run_blast(inputfile, input_type, blast_output, diamond_db, diamond_args, verbose)

        if exit_code is None:
            print('Unable to run diamond (make sure diamond is available in your PATH).')
            return

        if exit_code != 0:
            print('Failed to run diamond.')
            if diamond_args is not None:
                print('Incorrect diamond args? Please check documentation or use default args.')
            return

        annotations = load_diamond_results(blast_output)
    elif input_type == 'eggnog':
        annotations = load_eggnog_data(inputfile)
    elif input_type == 'diamond':
        annotations = load_diamond_results(inputfile)
    else:
        raise ValueError('Invalid input type: ' + input_type)

    if verbose:
        print('Loading universe model...')

    if not universe_file:
        if universe:
            universe_file = f"{project_dir}{config.get('generated', 'folder')}universe_{universe}.xml.gz"
        else:
            universe_file = project_dir + config.get('generated', 'default_universe')

    try:
        universe_model = load_cbmodel(universe_file, flavor='bigg')
        universe_model.id = model_id
    except IOError:
        available = '\n'.join(glob(f"{project_dir}{config.get('generated', 'folder')}universe_*.xml.gz"))
        raise IOError(f'Failed to load universe model: {universe_file}\nAvailable universe files:\n{available}')

    if reference:
        if verbose:
            print('Loading reference model...')

        try:
            ref_model = load_cbmodel(reference)
        except:
            raise IOError('Failed to load reference model.')
    else:
        ref_model = None

    if gapfill or init:

        if verbose:
            print('Loading media library...')

        if not mediadb:
            mediadb = project_dir + config.get('input', 'media_library')

        try:
            media_db = load_media_db(mediadb)
        except IOError:
            raise IOError('Failed to load media library:' + mediadb)

    if verbose:
        print('Scoring reactions...')

    gene_annotations = pd.read_csv(project_dir + config.get('generated', 'gene_annotations'), sep='\t')
    bigg_gprs = project_dir + config.get('generated', 'bigg_gprs')
    gprs = pd.read_csv(bigg_gprs)
    gprs = gprs[gprs.reaction.isin(universe_model.reactions)]

    debug_output = model_id if debug else None
    scores, gene2gene = reaction_scoring(annotations, gprs, debug_output=debug_output)

    if scores is None:
        print('The input genome did not match sufficient genes/reactions in the database.')
        return

    if not flavor:
        flavor = config.get('sbml', 'default_flavor')

    init_env = None

    if init:
        if init in media_db:
            init_env = Environment.from_compounds(media_db[init])
        else:
            print(f'Error: medium {init} not in media database.')

    universe_model.metadata['Description'] = 'This model was built with CarveMe version ' + version

    if ensemble_size is None or ensemble_size <= 1:
        if verbose:
            print('Reconstructing a single model')

        model = carve_model(universe_model, scores, inplace=(not gapfill), default_score=default_score,
                            uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints,
                            hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score,
                            init_env=init_env, debug_output=debug_output)
        annotate_genes(model, gene2gene, gene_annotations)

    else:
        if verbose:
            print('Building an ensemble of', ensemble_size, 'models')

        ensemble = build_ensemble(universe_model, scores, ensemble_size, init_env=init_env)

        annotate_genes(ensemble, gene2gene, gene_annotations)
        save_ensemble(ensemble, outputfile, flavor=flavor)

    if model is None:
        print("Failed to build model.")
        return

    if not gapfill:
        save_cbmodel(model, outputfile, flavor=flavor)

    else:
        media = gapfill.split(',')

        if verbose:
            m1, n1 = len(model.metabolites), len(model.reactions)
            print(f"Gap filling for {', '.join(media)}...")

        max_uptake = config.getint('gapfill', 'max_uptake')

        if blind_gapfill:
            scores = None
        else:
            scores = dict(scores[['reaction', 'normalized_score']].values)
        multiGapFill(model, universe_model, media, media_db, scores=scores, max_uptake=max_uptake, inplace=True)

        if verbose:
            m2, n2 = len(model.metabolites), len(model.reactions)
            print(f'Added {(n2 - n1)} reactions and {(m2 - m1)} metabolites')

        if init_env:  # Initializes environment again as new exchange reactions can be acquired during gap-filling
            init_env.apply(model, inplace=True, warning=False)

        save_cbmodel(model, outputfile, flavor=flavor)

    if verbose:
        print('Done.')
Пример #8
0
def medium_to_constraints(model, compounds, max_uptake=10, inplace=False, verbose=False):

    env = Environment.from_compounds(compounds, max_uptake=max_uptake)
    return env.apply(model, inplace=inplace, warning=verbose)
Пример #9
0
def mro_score(community,
              environment=None,
              direction=-1,
              min_mol_weight=False,
              min_growth=0.1,
              max_uptake=10,
              validate=False,
              verbose=True,
              use_lp=False,
              exclude=None):
    """
    Implements the metabolic resource overlap (MRO) score as defined in (Zelezniak et al, 2015).

    Args:
        community (Community): microbial community model
        environment (Environment): Metabolic environment in which the SMETANA score is colulated
        direction (int): direction of uptake reactions (negative or positive, default: -1)
        extracellular_id (str): extracellular compartment id
        min_mol_weight (bool): minimize by molecular weight of nutrients (default: False)
        min_growth (float): minimum growth rate (default: 0.1)
        max_uptake (float): maximum uptake rate (default: 10)

    Returns:
        float: MRO score
    """

    exch_reactions = set(community.merged.get_exchange_reactions())
    max_uptake = max_uptake * len(community.organisms)

    if environment:
        environment.apply(community.merged, inplace=True, warning=False)
        exch_reactions &= set(environment)

    medium, sol = minimal_medium(community.merged,
                                 exchange_reactions=exch_reactions,
                                 direction=direction,
                                 min_mass_weight=min_mol_weight,
                                 min_growth=min_growth,
                                 max_uptake=max_uptake,
                                 validate=validate,
                                 warnings=False,
                                 milp=(not use_lp))

    if sol.status != Status.OPTIMAL:
        if verbose:
            warn('MRO: Failed to find a valid solution for community')
        return None, None

    interacting_env = Environment.from_reactions(medium, max_uptake=max_uptake)
    interacting_env.apply(community.merged, inplace=True)

    if exclude is None:
        exclude = set()

    medium = {x[7:-7] for x in medium} - exclude
    individual_media = {}
    solver = solver_instance(community.merged)

    for org_id in community.organisms:
        biomass_reaction = community.organisms_biomass_reactions[org_id]
        community.merged.biomass_reaction = biomass_reaction
        org_interacting_exch = community.organisms_exchange_reactions[org_id]

        medium_i, sol = minimal_medium(community.merged,
                                       exchange_reactions=org_interacting_exch,
                                       direction=direction,
                                       min_mass_weight=min_mol_weight,
                                       min_growth=min_growth,
                                       max_uptake=max_uptake,
                                       validate=validate,
                                       solver=solver,
                                       warnings=False,
                                       milp=(not use_lp))

        if sol.status != Status.OPTIMAL:
            warn('MRO: Failed to find a valid solution for: ' + org_id)
            return None, None

        individual_media[org_id] = {
            org_interacting_exch[r].original_metabolite[2:-2]
            for r in medium_i
        } - exclude

    pairwise = {(o1, o2): individual_media[o1] & individual_media[o2]
                for o1, o2 in combinations(community.organisms, 2)}

    numerator = sum(map(
        len, pairwise.values())) / len(pairwise) if len(pairwise) != 0 else 0
    denominator = sum(map(len, individual_media.values())) / len(
        individual_media) if len(individual_media) != 0 else 0
    score = numerator / denominator if denominator != 0 else None

    extras = {'community_medium': medium, 'individual_media': individual_media}

    return score, extras
Пример #10
0
def mip_score(community,
              environment=None,
              min_mol_weight=False,
              min_growth=0.1,
              direction=-1,
              max_uptake=10,
              validate=False,
              verbose=True,
              use_lp=False,
              exclude=None):
    """
    Implements the metabolic interaction potential (MIP) score as defined in (Zelezniak et al, 2015).

    Args:
        community (Community): microbial community model
        environment (Environment): Metabolic environment in which the SMETANA score is calculated
        direction (int): direction of uptake reactions (negative or positive, default: -1)
        extracellular_id (str): extracellular compartment id
        min_mol_weight (bool): minimize by molecular weight of nutrients (default: False)
        min_growth (float): minimum growth rate (default: 0.1)
        max_uptake (float): maximum uptake rate (default: 10)
        validate (bool): validate solution using FBA (for debugging purposes, default: False)

    Returns:
        float: MIP score
    """

    noninteracting = community.copy(copy_models=False, interacting=False)
    exch_reactions = set(community.merged.get_exchange_reactions())
    max_uptake = max_uptake * len(community.organisms)

    if environment:
        environment.apply(noninteracting.merged, inplace=True, warning=False)
        exch_reactions &= set(environment)

    noninteracting_medium, sol1 = minimal_medium(
        noninteracting.merged,
        exchange_reactions=exch_reactions,
        direction=direction,
        min_mass_weight=min_mol_weight,
        min_growth=min_growth,
        max_uptake=max_uptake,
        validate=validate,
        warnings=False,
        milp=(not use_lp))
    if noninteracting_medium is None:
        if verbose:
            warn(
                'MIP: Failed to find a valid solution for non-interacting community'
            )
        return None, None

    # anabiotic environment is limited to non-interacting community minimal media
    noninteracting_env = Environment.from_reactions(noninteracting_medium,
                                                    max_uptake=max_uptake)
    noninteracting_env.apply(community.merged, inplace=True)

    interacting_medium, sol2 = minimal_medium(
        community.merged,
        direction=direction,
        exchange_reactions=noninteracting_medium,
        min_mass_weight=min_mol_weight,
        min_growth=min_growth,
        milp=(not use_lp),
        max_uptake=max_uptake,
        validate=validate,
        warnings=False)

    if interacting_medium is None:
        if verbose:
            warn(
                'MIP: Failed to find a valid solution for interacting community'
            )
        return None, None

    if exclude is not None:
        exclude_rxns = {'R_EX_M_{}_e_pool'.format(x) for x in exclude}
        interacting_medium = set(interacting_medium) - exclude_rxns
        noninteracting_medium = set(noninteracting_medium) - exclude_rxns

    score = len(noninteracting_medium) - len(interacting_medium)

    noninteracting_medium = [r_id[7:-7] for r_id in noninteracting_medium]
    interacting_medium = [r_id[7:-7] for r_id in interacting_medium]

    extras = {
        'noninteracting_medium': noninteracting_medium,
        'interacting_medium': interacting_medium
    }

    return score, extras