def define_environment(medium, media_db, community, mode, aerobic, verbose, min_mol_weight, use_lp): max_uptake = 10.0 * len(community.organisms) if medium: fmt_func = lambda x: "R_EX_M_{}_e_pool".format(x) env = Environment.from_compounds(media_db[medium], fmt_func=fmt_func, max_uptake=max_uptake) medium_id = medium elif mode == "global": env = Environment.complete(community.merged, max_uptake=max_uptake) medium_id = 'complete' if aerobic is not None and aerobic: env["R_EX_M_o2_e_pool"] = (-max_uptake, inf) if aerobic is not None and not aerobic: env["R_EX_M_o2_e_pool"] = (0, inf) else: env = minimal_environment(community, aerobic, verbose=verbose, min_mol_weight=min_mol_weight, use_lp=use_lp, max_uptake=max_uptake) medium_id = "minimal" return medium_id, env
def maincall(inputfiles, flavor=None, init=None, mediadb=None, outputfile=None): if not flavor: flavor = config.get('sbml', 'default_flavor') if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = 'community' outputfile = 'community.xml' models = [ load_cbmodel(inputfile, flavor=flavor) for inputfile in inputfiles ] community = Community(model_id, models) model = community.merged_model if init: if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) init_env = Environment.from_compounds(media_db[init]) init_env.apply(model, inplace=True) save_cbmodel(model, outputfile, flavor=flavor)
def benchmark_biolog(model, medium, data, min_growth=0.1, max_uptake=10): env = Environment.from_compounds(medium) constraints = env.apply(model, inplace=False, warning=False) solver = solver_instance(model) data = data[["bigg_id", "growth"]].dropna() result = {} for _, row in data.iterrows(): met = row["bigg_id"] in_vivo_growth = row["growth"] in {'++', '+'} r_id = f"R_EX_{met}_e" if r_id in model.reactions: tmp = constraints[r_id] if r_id in constraints else (0, 0) constraints[r_id] = (-max_uptake, 0) sol = FBA(model, constraints=constraints, solver=solver) in_silico_growth = Status.OPTIMAL and sol.fobj > min_growth constraints[r_id] = tmp else: in_silico_growth = False if in_silico_growth: result[met] = 'TP' if in_vivo_growth else 'FP' else: result[met] = 'FN' if in_vivo_growth else 'TN' return result
def benchmark_essentiality(model, medium, in_vivo): if medium is not None: env = Environment.from_compounds(medium) else: env = Environment.complete(model) constraints = env.apply(model, inplace=False, warning=False) in_silico = essential_genes(model, constraints=constraints, min_growth=0.1) result = {} for gene, is_essential in in_vivo.items(): if is_essential: if gene in in_silico: result[gene] = 'TP' else: result[gene] = 'FN' else: if gene in in_silico: result[gene] = 'FP' else: result[gene] = 'TN' return result
def minimal_environment(community, aerobic=None, min_mol_weight=False, min_growth=0.1, max_uptake=10, validate=False, verbose=True, use_lp=False): exch_reactions = set(community.merged.get_exchange_reactions()) exch_reactions -= {"R_EX_M_h2o_e_pool"} community.merged.set_flux_bounds("R_EX_M_h2o_e_pool", -inf, inf) if aerobic is not None: exch_reactions -= {"R_EX_M_o2_e_pool"} if aerobic: community.merged.set_flux_bounds("R_EX_M_o2_e_pool", -max_uptake, inf) else: community.merged.set_flux_bounds("R_EX_M_o2_e_pool", 0, inf) ex_rxns, sol = minimal_medium(community.merged, exchange_reactions=exch_reactions, min_mass_weight=min_mol_weight, min_growth=min_growth, milp=(not use_lp), max_uptake=max_uptake, validate=validate, warnings=False) if ex_rxns is None: if verbose: warn('Failed to find a medium for interacting community.') return None else: if aerobic is not None and aerobic: ex_rxns |= {"R_EX_M_o2_e_pool"} env = Environment.from_reactions(ex_rxns, max_uptake=max_uptake) env["R_EX_M_h2o_e_pool"] = (-inf, inf) return env
def run_abiotic(comm_id, sense, community, medium_id, excluded_mets, env, verbose, min_mol_weight, other_mets, n, p, ignore_coupling): medium = set(env.get_compounds(fmt_func=lambda x: x[7:-7])) max_uptake = 10.0 * len(community.organisms) if sense == 'add': modified = sorted(other_mets - (medium | excluded_mets)) if sense == 'rm': modified = sorted(other_mets - (medium | excluded_mets)) n_extra_cpds = 2 * p modified = sample(modified, n_extra_cpds) medium = medium | set(modified) env = Environment.from_compounds( medium, fmt_func=lambda x: f"R_EX_M_{x}_e_pool", max_uptake=max_uptake) if len(modified) < p: raise RuntimeError( "Insufficient compounds ({}) to perform ({}) perturbations.". format(len(modified), p)) if n == 0: do_all = True n = len(modified) if verbose: print( 'Running {} systematic abiotic perturbations with 1 compound...' .format(n)) else: do_all = False if verbose: print( 'Running {} random abiotic perturbations with {} compounds...'. format(n, p)) data = run_detailed(comm_id, community, medium_id, excluded_mets, env, False, min_mol_weight, ignore_coupling) for i in range(n): if do_all: if sense == 'add': new_compounds = list(medium) + [modified[i]] if sense == 'rm': new_compounds = medium - {modified[i]} new_id = "{}_{}".format(medium_id, modified[i]) else: if sense == 'add': new_compounds = list(medium) + sample(modified, p) if sense == 'rm': new_compounds = medium - set(sample(modified, p)) new_id = "{}_{}".format(medium_id, i + 1) new_env = Environment.from_compounds( new_compounds, fmt_func=lambda x: f"R_EX_M_{x}_e_pool", max_uptake=max_uptake) entries = run_detailed(comm_id, community, new_id, excluded_mets, new_env, False, min_mol_weight, ignore_coupling) data.extend(entries) return data
def maincall(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None, ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None, mediadb=None, default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, reference=None, ref_score=None, recursive_mode=False): if recursive_mode: model_id = os.path.splitext(os.path.basename(inputfile))[0] if outputfile: outputfile = f'{outputfile}/{model_id}.xml' else: outputfile = os.path.splitext(inputfile)[0] + '.xml' else: if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = os.path.splitext(os.path.basename(inputfile))[0] outputfile = os.path.splitext(inputfile)[0] + '.xml' model_id = build_model_id(model_id) outputfolder = os.path.abspath(os.path.dirname(outputfile)) if not os.path.exists(outputfolder): try: os.makedirs(outputfolder) except: print('Unable to create output folder:', outputfolder) return if soft: try: soft_constraints = load_soft_constraints(soft) except IOError: raise IOError('Failed to load soft-constraints file:' + soft) else: soft_constraints = None if hard: try: hard_constraints = load_hard_constraints(hard) except IOError: raise IOError('Failed to load hard-constraints file:' + hard) else: hard_constraints = None if input_type == 'refseq': if verbose: print(f'Downloading genome {inputfile} from NCBI...') ncbi_table = load_ncbi_table(project_dir + config.get('input', 'refseq')) inputfile = download_ncbi_genome(inputfile, ncbi_table) if not inputfile: print('Failed to download genome from NCBI.') return input_type = 'protein' if inputfile.endswith('.faa.gz') else 'dna' if input_type == 'protein' or input_type == 'dna': if verbose: print('Running diamond...') diamond_db = project_dir + config.get('generated', 'diamond_db') blast_output = os.path.splitext(inputfile)[0] + '.tsv' exit_code = run_blast(inputfile, input_type, blast_output, diamond_db, diamond_args, verbose) if exit_code is None: print('Unable to run diamond (make sure diamond is available in your PATH).') return if exit_code != 0: print('Failed to run diamond.') if diamond_args is not None: print('Incorrect diamond args? Please check documentation or use default args.') return annotations = load_diamond_results(blast_output) elif input_type == 'eggnog': annotations = load_eggnog_data(inputfile) elif input_type == 'diamond': annotations = load_diamond_results(inputfile) else: raise ValueError('Invalid input type: ' + input_type) if verbose: print('Loading universe model...') if not universe_file: if universe: universe_file = f"{project_dir}{config.get('generated', 'folder')}universe_{universe}.xml.gz" else: universe_file = project_dir + config.get('generated', 'default_universe') try: universe_model = load_cbmodel(universe_file, flavor='bigg') universe_model.id = model_id except IOError: available = '\n'.join(glob(f"{project_dir}{config.get('generated', 'folder')}universe_*.xml.gz")) raise IOError(f'Failed to load universe model: {universe_file}\nAvailable universe files:\n{available}') if reference: if verbose: print('Loading reference model...') try: ref_model = load_cbmodel(reference) except: raise IOError('Failed to load reference model.') else: ref_model = None if gapfill or init: if verbose: print('Loading media library...') if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) if verbose: print('Scoring reactions...') gene_annotations = pd.read_csv(project_dir + config.get('generated', 'gene_annotations'), sep='\t') bigg_gprs = project_dir + config.get('generated', 'bigg_gprs') gprs = pd.read_csv(bigg_gprs) gprs = gprs[gprs.reaction.isin(universe_model.reactions)] debug_output = model_id if debug else None scores, gene2gene = reaction_scoring(annotations, gprs, debug_output=debug_output) if scores is None: print('The input genome did not match sufficient genes/reactions in the database.') return if not flavor: flavor = config.get('sbml', 'default_flavor') init_env = None if init: if init in media_db: init_env = Environment.from_compounds(media_db[init]) else: print(f'Error: medium {init} not in media database.') universe_model.metadata['Description'] = 'This model was built with CarveMe version ' + version if ensemble_size is None or ensemble_size <= 1: if verbose: print('Reconstructing a single model') model = carve_model(universe_model, scores, inplace=(not gapfill), default_score=default_score, uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints, hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score, init_env=init_env, debug_output=debug_output) annotate_genes(model, gene2gene, gene_annotations) else: if verbose: print('Building an ensemble of', ensemble_size, 'models') ensemble = build_ensemble(universe_model, scores, ensemble_size, init_env=init_env) annotate_genes(ensemble, gene2gene, gene_annotations) save_ensemble(ensemble, outputfile, flavor=flavor) if model is None: print("Failed to build model.") return if not gapfill: save_cbmodel(model, outputfile, flavor=flavor) else: media = gapfill.split(',') if verbose: m1, n1 = len(model.metabolites), len(model.reactions) print(f"Gap filling for {', '.join(media)}...") max_uptake = config.getint('gapfill', 'max_uptake') if blind_gapfill: scores = None else: scores = dict(scores[['reaction', 'normalized_score']].values) multiGapFill(model, universe_model, media, media_db, scores=scores, max_uptake=max_uptake, inplace=True) if verbose: m2, n2 = len(model.metabolites), len(model.reactions) print(f'Added {(n2 - n1)} reactions and {(m2 - m1)} metabolites') if init_env: # Initializes environment again as new exchange reactions can be acquired during gap-filling init_env.apply(model, inplace=True, warning=False) save_cbmodel(model, outputfile, flavor=flavor) if verbose: print('Done.')
def medium_to_constraints(model, compounds, max_uptake=10, inplace=False, verbose=False): env = Environment.from_compounds(compounds, max_uptake=max_uptake) return env.apply(model, inplace=inplace, warning=verbose)
def mro_score(community, environment=None, direction=-1, min_mol_weight=False, min_growth=0.1, max_uptake=10, validate=False, verbose=True, use_lp=False, exclude=None): """ Implements the metabolic resource overlap (MRO) score as defined in (Zelezniak et al, 2015). Args: community (Community): microbial community model environment (Environment): Metabolic environment in which the SMETANA score is colulated direction (int): direction of uptake reactions (negative or positive, default: -1) extracellular_id (str): extracellular compartment id min_mol_weight (bool): minimize by molecular weight of nutrients (default: False) min_growth (float): minimum growth rate (default: 0.1) max_uptake (float): maximum uptake rate (default: 10) Returns: float: MRO score """ exch_reactions = set(community.merged.get_exchange_reactions()) max_uptake = max_uptake * len(community.organisms) if environment: environment.apply(community.merged, inplace=True, warning=False) exch_reactions &= set(environment) medium, sol = minimal_medium(community.merged, exchange_reactions=exch_reactions, direction=direction, min_mass_weight=min_mol_weight, min_growth=min_growth, max_uptake=max_uptake, validate=validate, warnings=False, milp=(not use_lp)) if sol.status != Status.OPTIMAL: if verbose: warn('MRO: Failed to find a valid solution for community') return None, None interacting_env = Environment.from_reactions(medium, max_uptake=max_uptake) interacting_env.apply(community.merged, inplace=True) if exclude is None: exclude = set() medium = {x[7:-7] for x in medium} - exclude individual_media = {} solver = solver_instance(community.merged) for org_id in community.organisms: biomass_reaction = community.organisms_biomass_reactions[org_id] community.merged.biomass_reaction = biomass_reaction org_interacting_exch = community.organisms_exchange_reactions[org_id] medium_i, sol = minimal_medium(community.merged, exchange_reactions=org_interacting_exch, direction=direction, min_mass_weight=min_mol_weight, min_growth=min_growth, max_uptake=max_uptake, validate=validate, solver=solver, warnings=False, milp=(not use_lp)) if sol.status != Status.OPTIMAL: warn('MRO: Failed to find a valid solution for: ' + org_id) return None, None individual_media[org_id] = { org_interacting_exch[r].original_metabolite[2:-2] for r in medium_i } - exclude pairwise = {(o1, o2): individual_media[o1] & individual_media[o2] for o1, o2 in combinations(community.organisms, 2)} numerator = sum(map( len, pairwise.values())) / len(pairwise) if len(pairwise) != 0 else 0 denominator = sum(map(len, individual_media.values())) / len( individual_media) if len(individual_media) != 0 else 0 score = numerator / denominator if denominator != 0 else None extras = {'community_medium': medium, 'individual_media': individual_media} return score, extras
def mip_score(community, environment=None, min_mol_weight=False, min_growth=0.1, direction=-1, max_uptake=10, validate=False, verbose=True, use_lp=False, exclude=None): """ Implements the metabolic interaction potential (MIP) score as defined in (Zelezniak et al, 2015). Args: community (Community): microbial community model environment (Environment): Metabolic environment in which the SMETANA score is calculated direction (int): direction of uptake reactions (negative or positive, default: -1) extracellular_id (str): extracellular compartment id min_mol_weight (bool): minimize by molecular weight of nutrients (default: False) min_growth (float): minimum growth rate (default: 0.1) max_uptake (float): maximum uptake rate (default: 10) validate (bool): validate solution using FBA (for debugging purposes, default: False) Returns: float: MIP score """ noninteracting = community.copy(copy_models=False, interacting=False) exch_reactions = set(community.merged.get_exchange_reactions()) max_uptake = max_uptake * len(community.organisms) if environment: environment.apply(noninteracting.merged, inplace=True, warning=False) exch_reactions &= set(environment) noninteracting_medium, sol1 = minimal_medium( noninteracting.merged, exchange_reactions=exch_reactions, direction=direction, min_mass_weight=min_mol_weight, min_growth=min_growth, max_uptake=max_uptake, validate=validate, warnings=False, milp=(not use_lp)) if noninteracting_medium is None: if verbose: warn( 'MIP: Failed to find a valid solution for non-interacting community' ) return None, None # anabiotic environment is limited to non-interacting community minimal media noninteracting_env = Environment.from_reactions(noninteracting_medium, max_uptake=max_uptake) noninteracting_env.apply(community.merged, inplace=True) interacting_medium, sol2 = minimal_medium( community.merged, direction=direction, exchange_reactions=noninteracting_medium, min_mass_weight=min_mol_weight, min_growth=min_growth, milp=(not use_lp), max_uptake=max_uptake, validate=validate, warnings=False) if interacting_medium is None: if verbose: warn( 'MIP: Failed to find a valid solution for interacting community' ) return None, None if exclude is not None: exclude_rxns = {'R_EX_M_{}_e_pool'.format(x) for x in exclude} interacting_medium = set(interacting_medium) - exclude_rxns noninteracting_medium = set(noninteracting_medium) - exclude_rxns score = len(noninteracting_medium) - len(interacting_medium) noninteracting_medium = [r_id[7:-7] for r_id in noninteracting_medium] interacting_medium = [r_id[7:-7] for r_id in interacting_medium] extras = { 'noninteracting_medium': noninteracting_medium, 'interacting_medium': interacting_medium } return score, extras