def benchmark_essentiality(model, medium, in_vivo_essential, in_vivo_non_essential=None, verbose=False, ensemble=False, voting_thresholds=None, flavor=None): if ensemble: ensemble = model model = ensemble.model if flavor == 'seed': ex_rxn_format = 'EX_{}_e0' else: ex_rxn_format = 'R_EX_{}_e' if medium is not None: env = Environment.from_compounds(medium, exchange_format='"{}"'.format(ex_rxn_format)) else: env = Environment.complete(model) constraints = env.apply(model, inplace=False, warning=False) if ensemble: results = ensemble_essentiality(ensemble, constraints, voting_thresholds, min_growth=0.1) data = [] for in_silico_essential in results: in_silico_non_essential = set(model.genes) - in_silico_essential res = essentialtity_eval(in_silico_essential, in_silico_non_essential, in_vivo_essential, in_vivo_non_essential, verbose) data.append(res) return data else: in_silico_essential = set(essential_genes(model, constraints=constraints, min_growth=0.1)) in_silico_non_essential = set(model.genes) - in_silico_essential return essentialtity_eval(in_silico_essential, in_silico_non_essential, in_vivo_essential, in_vivo_non_essential, verbose)
def medium_to_constraints(model, compounds, max_uptake=10, inplace=False, verbose=False, exchange_format=None): if not exchange_format: exchange_format = "'R_EX_{}_e'" env = Environment.from_compounds(compounds, max_uptake=max_uptake, exchange_format=exchange_format) return env.apply(model, inplace=inplace, warning=verbose)
def __create_cmodel(self): """ creates the community model itself, with a complete medium provided by the framed functions Community.merge and Environment.complete :return: framed model object, a community model merging every model present in self.models """ community = Community( community_id=self.community_id, models=self.models, extracellular_compartment_id=self.extracellular_compartment_id, create_biomass=False, interacting=True) c_model = community.merged if not self.empty_flag: Environment.complete(c_model, inplace=True) elif self.empty_flag: Environment.empty(c_model, inplace=True) return c_model
def main(inputfiles, flavor=None, split_pool=False, no_biomass=False, init=None, mediadb=None, ext_comp_id=None, outputfile=None): if not flavor: flavor = config.get('sbml', 'default_flavor') if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = 'community' outputfile = 'community.xml' if ext_comp_id is None: ext_comp_id = 'C_e' models = [load_cbmodel(inputfile, flavor=flavor) for inputfile in inputfiles] community = Community(model_id, models, extracellular_compartment_id=ext_comp_id, merge_extracellular_compartments=(not split_pool), create_biomass=(not no_biomass)) merged = community.generate_merged_model() if init: if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) if split_pool: exchange_format = "'R_EX_M_{}_e_pool'" else: exchange_format = "'R_EX_{}_e'" init_env = Environment.from_compounds(media_db[init], exchange_format=exchange_format) init_env.apply(merged, inplace=True) save_cbmodel(merged, outputfile, flavor=flavor)
def simulate_biolog(model, medium, source, compounds, main_compounds, max_uptake=10, min_growth=0.1, verbose=False, add_transporters=False, add_sinks=False, ensemble=False, voting_thresholds=None, flavor=None): if ensemble: ensemble = model model = ensemble.model if flavor == 'seed': ex_rxn_format = 'EX_{}_e0' else: ex_rxn_format = 'R_EX_{}_e' env = Environment.from_compounds(medium, max_uptake=max_uptake, exchange_format='"{}"'.format(ex_rxn_format)) constraints = env.apply(model, inplace=False, warning=False) for cmpd in main_compounds[source]: main_compound_rxn = ex_rxn_format.format(cmpd) constraints[main_compound_rxn] = (0, None) no_exchange = [] not_in_model = [] growth_pred = {} if flavor == 'seed': model_mets = {m_id[:-3] for m_id in model.metabolites} else: model_mets = {m_id[2:-2] for m_id in model.metabolites} new_rxns = [] if add_transporters: model = model.copy() for met in compounds: met_e = ('{}_e0' if flavor == 'seed' else 'M_{}_e').format(met) met_c = ('{}_c0' if flavor == 'seed' else 'M_{}_c').format(met) if met_e not in model.metabolites and met_c in model.metabolites: if flavor == 'seed': rxn_str = 'EX_{}: {} <-> [0, 0]'.format(met_e, met_c) else: rxn_str = 'R_EX_{}_e: {} <-> [0, 0]'.format(met, met_c) new_rxns.append(model.add_reaction_from_str(rxn_str)) if add_sinks: model = model.copy() for m_id in model.metabolites: if m_id.endswith('_c'): rxn_str = 'Sink_{}: {} --> '.format(m_id, m_id) model.add_reaction_from_str(rxn_str) solver = solver_instance(model) summary = {} for met in compounds: growth_pred[met] = [None]*len(voting_thresholds) if ensemble else None r_id = ex_rxn_format.format(met) if r_id in model.reactions: tmp = constraints[r_id] if r_id in constraints else (0, 0) constraints[r_id] = (-max_uptake, 0) if ensemble: growth_all = simulate_ensemble(ensemble, constraints=constraints, solver=solver, get_fluxes=False) growth_bool = [rate > min_growth if rate is not None else False for rate in growth_all] growth_pred[met] = [sum(growth_bool)/float(ensemble.size) >= t for t in voting_thresholds] else: sol = FBA(model, constraints=constraints, solver=solver) if sol.status == Status.OPTIMAL: growth_pred[met] = sol.fobj > min_growth else: growth_pred[met] = False constraints[r_id] = tmp else: if met in model_mets: no_exchange.append(met) else: not_in_model.append(met) if verbose and no_exchange: print 'No exchange reactions in model:', ' '.join(sorted(no_exchange)) if verbose and not_in_model: print 'Metabolites not in model:', ' '.join(sorted(not_in_model)) return growth_pred
def mro_score(community, environment=None, direction=-1, min_mass_weight=False, min_growth=1, max_uptake=100, validate=False): """ Implements the metabolic resource overlap (MRO) score as defined in (Zelezniak et al, 2015). Args: community (Community): microbial community model environment (Environment): Metabolic environment in which the SMETANA score is colulated direction (int): direction of uptake reactions (negative or positive, default: -1) extracellular_id (str): extracellular compartment id min_mass_weight (bool): minimize by molecular weight of nutrients (default: False) min_growth (float): minimum growth rate (default: 1) max_uptake (float): maximum uptake rate (default: 100) Returns: float: MRO score """ # TODO: 1_program_cloneModels.prof inter_community = community.copy(copy_models=False, interacting=True, merge_extracellular_compartments=False, create_biomass=False) indep_community = inter_community.copy(copy_models=False, interacting=False, create_biomass=True) exch_reactions = set(inter_community.merged.get_exchange_reactions()) - set([inter_community.merged.biomass_reaction]) if environment: environment.apply(inter_community.merged, inplace=True) environment.apply(indep_community.merged, inplace=True) exch_reactions = exch_reactions - set(environment) noninteracting_medium, sol = minimal_medium(indep_community.merged, exchange_reactions=exch_reactions, direction=direction, min_mass_weight=min_mass_weight, min_growth=min_growth, max_uptake=max_uptake, validate=validate) solutions = [sol] if sol.status != Status.OPTIMAL: raise RuntimeError('Failed to find a valid solution') # anabiotic environment is limited to non-interacting community minimal media noninteracting_exch = set(noninteracting_medium) minimal_medium_set = noninteracting_medium | set(environment) indep_environment = Environment.from_reactions(minimal_medium_set, max_uptake=max_uptake) indep_environment.apply(inter_community.merged, inplace=True) individual_media = {} for org_id in inter_community.organisms: biomass_reaction = inter_community.organisms_biomass_reactions[org_id] inter_community.merged.biomass_reaction = biomass_reaction org_noninteracting_exch = inter_community.organisms_exchange_reactions[org_id] medium, sol = minimal_medium(inter_community.merged, exchange_reactions=org_noninteracting_exch, direction=direction, min_mass_weight=min_mass_weight, min_growth=min_growth, max_uptake=max_uptake, validate=validate) solutions.append(sol) if sol.status != Status.OPTIMAL: raise RuntimeError('Failed to find a valid solution') individual_media[org_id] = {org_noninteracting_exch[r].original_metabolite for r in medium} pairwise = {(o1, o2): individual_media[o1] & individual_media[o2] for o1, o2 in combinations(community.organisms, 2)} numerator = len(individual_media) * sum(map(len, pairwise.values())) denominator = float(len(pairwise) * sum(map(len, individual_media.values()))) score = numerator / denominator if denominator != 0 else None extras = {'noninteracting_medium': noninteracting_medium, 'individual_media': individual_media, 'pairwise': pairwise, 'solutions': solutions} return score, extras
def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None, ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None, mediadb=None, default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, reference=None, ref_score=None, recursive_mode=False, specified_solver=None, feas_tol=None, opt_tol=None, int_feas_tol=None): if recursive_mode: model_id = os.path.splitext(os.path.basename(inputfile))[0] if outputfile: outputfile = '{}/{}.xml'.format(outputfile, model_id) else: outputfile = os.path.splitext(inputfile)[0] + '.xml' else: if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = os.path.splitext(os.path.basename(inputfile))[0] outputfile = os.path.splitext(inputfile)[0] + '.xml' model_id = build_model_id(model_id) outputfolder = os.path.abspath(os.path.dirname(outputfile)) if not os.path.exists(outputfolder): try: os.makedirs(outputfolder) except: print('Unable to create output folder:', outputfolder) return if soft: try: soft_constraints = load_soft_constraints(soft) except IOError: raise IOError('Failed to load soft-constraints file:' + soft) else: soft_constraints = None if hard: try: hard_constraints = load_hard_constraints(hard) except IOError: raise IOError('Failed to load hard-constraints file:' + hard) else: hard_constraints = None if input_type == 'refseq' or input_type == 'genbank': if verbose: print('Downloading genome {} from NCBI...'.format(inputfile)) ncbi_table = load_ncbi_table(project_dir + config.get('ncbi', input_type)) inputfile = download_ncbi_genome(inputfile, ncbi_table) if not inputfile: print('Failed to download genome from NCBI.') return input_type = 'protein' if inputfile.endswith('.faa.gz') else 'dna' if input_type == 'protein' or input_type == 'dna': if verbose: print('Running diamond...') diamond_db = project_dir + config.get('input', 'diamond_db') blast_output = os.path.splitext(inputfile)[0] + '.tsv' exit_code = run_blast(inputfile, input_type, blast_output, diamond_db, diamond_args, verbose) if exit_code is None: print('Unable to run diamond (make sure diamond is available in your PATH).') return if exit_code != 0: print('Failed to run diamond.') if diamond_args is not None: print('Incorrect diamond args? Please check documentation or use default args.') return annotations = load_diamond_results(blast_output) elif input_type == 'eggnog': annotations = load_eggnog_data(inputfile) elif input_type == 'diamond': annotations = load_diamond_results(inputfile) else: raise ValueError('Invalid input type: ' + input_type) if verbose: print('Loading universe model...') if not universe_file: if universe: universe_file = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), universe) else: universe_file = project_dir + config.get('generated', 'default_universe') # change default solver if a solver is specified in the input if specified_solver is not None: if specified_solver != config.get('solver', 'default_solver'): set_default_solver(specified_solver) params_to_set = {'FEASIBILITY_TOL': feas_tol, 'OPTIMALITY_TOL': opt_tol, 'INT_FEASIBILITY_TOL': int_feas_tol} for key,value in params_to_set.items(): if value is not None: set_default_parameter(getattr(Parameter, key), value) try: universe_model = load_cbmodel(universe_file, flavor=config.get('sbml', 'default_flavor')) universe_model.id = model_id except IOError: available = '\n'.join(glob("{}{}universe_*.xml.gz".format(project_dir, config.get('generated', 'folder')))) raise IOError('Failed to load universe model: {}\nAvailable universe files:\n{}'.format(universe_file, available)) if reference: if verbose: print('Loading reference model...') try: ref_model = load_cbmodel(reference) except: raise IOError('Failed to load reference model.') else: ref_model = None if gapfill or init: if verbose: print('Loading media library...') if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) if verbose: print('Scoring reactions...') bigg_gprs = project_dir + config.get('generated', 'bigg_gprs') gprs = pd.read_csv(bigg_gprs) gprs = gprs[gprs.reaction.isin(universe_model.reactions)] debug_output = model_id if debug else None scores = reaction_scoring(annotations, gprs, debug_output=debug_output) if scores is None: print('The input genome did not match sufficient genes/reactions in the database.') return if not flavor: flavor = config.get('sbml', 'default_flavor') init_env = None if init: if init in media_db: init_env = Environment.from_compounds(media_db[init]) else: print('Error: medium {} not in media database.'.format(init)) universe_model.metadata['Description'] = 'This model was built with CarveMe version ' + version if ensemble_size is None or ensemble_size <= 1: if verbose: print('Reconstructing a single model') if not gapfill: carve_model(universe_model, scores, outputfile=outputfile, flavor=flavor, default_score=default_score, uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints, hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score, init_env=init_env, debug_output=debug_output) else: model = carve_model(universe_model, scores, inplace=False, default_score=default_score, uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints, hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score, init_env=init_env, debug_output=debug_output) else: if verbose: print('Building an ensemble of', ensemble_size, 'models') build_ensemble(universe_model, scores, ensemble_size, outputfile, flavor, init_env=init_env) if gapfill and model is not None: media = gapfill.split(',') if verbose: m1, n1 = len(model.metabolites), len(model.reactions) print('Gap filling for {}...'.format(', '.join(media))) max_uptake = config.getint('gapfill', 'max_uptake') if blind_gapfill: scores = None else: scores = dict(scores[['reaction', 'normalized_score']].values) multiGapFill(model, universe_model, media, media_db, scores=scores, max_uptake=max_uptake, inplace=True) if verbose: m2, n2 = len(model.metabolites), len(model.reactions) print('Added {} reactions and {} metabolites'.format((n2 - n1), (m2 - m1))) if init_env: #Should initialize enviroment again as new exchange reactions can be acquired during gap-filling init_env.apply(model, inplace=True, warning=False) save_cbmodel(model, outputfile, flavor=flavor) if verbose: print('Done.')
def mro_score(community, environment=None, direction=-1, min_mass_weight=False, min_growth=0.1, max_uptake=10, validate=False, verbose=True, exclude=None): """ Implements the metabolic resource overlap (MRO) score as defined in (Zelezniak et al, 2015). Args: community (Community): microbial community model environment (Environment): Metabolic environment in which the SMETANA score is colulated direction (int): direction of uptake reactions (negative or positive, default: -1) extracellular_id (str): extracellular compartment id min_mass_weight (bool): minimize by molecular weight of nutrients (default: False) min_growth (float): minimum growth rate (default: 0.1) max_uptake (float): maximum uptake rate (default: 10) Returns: float: MRO score """ noninteracting = community.copy(copy_models=False, interacting=False, create_biomass=True) exch_reactions = set(community.merged.get_exchange_reactions()) if environment: environment.apply(community.merged, inplace=True, warning=False) environment.apply(noninteracting.merged, inplace=True, warning=False) exch_reactions &= set(environment) noninteracting_medium, sol = minimal_medium( noninteracting.merged, exchange_reactions=exch_reactions, direction=direction, min_mass_weight=min_mass_weight, min_growth=min_growth, max_uptake=max_uptake, validate=validate, warnings=verbose) solutions = [sol] if sol.status != Status.OPTIMAL: if verbose: warn( 'MRO: Failed to find a valid solution for non-interacting community' ) return None, None # anabiotic environment is limited to non-interacting community minimal media noninteracting_exch = set(noninteracting_medium) noninteracting_env = Environment.from_reactions(noninteracting_exch, max_uptake=max_uptake) noninteracting_env.apply(community.merged, inplace=True) individual_media = {} if exclude is not None: exclude = {'M_{}_e'.format(x) for x in exclude} else: exclude = {} solver = solver_instance(community.merged) for org_id in community.organisms: biomass_reaction = community.organisms_biomass_reactions[org_id] community.merged.biomass_reaction = biomass_reaction org_noninteracting_exch = community.organisms_exchange_reactions[ org_id] medium, sol = minimal_medium( community.merged, exchange_reactions=org_noninteracting_exch, direction=direction, min_mass_weight=min_mass_weight, min_growth=min_growth, max_uptake=max_uptake, validate=validate, solver=solver, warnings=verbose) solutions.append(sol) if sol.status != Status.OPTIMAL: warn('MRO: Failed to find a valid solution for: ' + org_id) return None, None individual_media[org_id] = { org_noninteracting_exch[r].original_metabolite for r in medium } - exclude pairwise = {(o1, o2): individual_media[o1] & individual_media[o2] for o1, o2 in combinations(community.organisms, 2)} numerator = len(individual_media) * sum(map(len, pairwise.values())) denominator = float( len(pairwise) * sum(map(len, individual_media.values()))) score = numerator / denominator if denominator != 0 else None extras = { 'noninteracting_medium': noninteracting_medium, 'individual_media': individual_media, 'pairwise': pairwise, 'solutions': solutions } return score, extras