def define_environment(medium, media_db, community, mode, aerobic, verbose, min_mol_weight, use_lp): max_uptake = 10.0 * len(community.organisms) if medium: fmt_func = lambda x: "R_EX_M_{}_e_pool".format(x) env = Environment.from_compounds(media_db[medium], fmt_func=fmt_func, max_uptake=max_uptake) medium_id = medium elif mode == "global": env = Environment.complete(community.merged, max_uptake=max_uptake) medium_id = 'complete' if aerobic is not None and aerobic: env["R_EX_M_o2_e_pool"] = (-max_uptake, inf) if aerobic is not None and not aerobic: env["R_EX_M_o2_e_pool"] = (0, inf) else: env = minimal_environment(community, aerobic, verbose=verbose, min_mol_weight=min_mol_weight, use_lp=use_lp, max_uptake=max_uptake) medium_id = "minimal" return medium_id, env
def maincall(inputfiles, flavor=None, init=None, mediadb=None, outputfile=None): if not flavor: flavor = config.get('sbml', 'default_flavor') if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = 'community' outputfile = 'community.xml' models = [ load_cbmodel(inputfile, flavor=flavor) for inputfile in inputfiles ] community = Community(model_id, models) model = community.merged_model if init: if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) init_env = Environment.from_compounds(media_db[init]) init_env.apply(model, inplace=True) save_cbmodel(model, outputfile, flavor=flavor)
def benchmark_biolog(model, medium, data, min_growth=0.1, max_uptake=10): env = Environment.from_compounds(medium) constraints = env.apply(model, inplace=False, warning=False) solver = solver_instance(model) data = data[["bigg_id", "growth"]].dropna() result = {} for _, row in data.iterrows(): met = row["bigg_id"] in_vivo_growth = row["growth"] in {'++', '+'} r_id = f"R_EX_{met}_e" if r_id in model.reactions: tmp = constraints[r_id] if r_id in constraints else (0, 0) constraints[r_id] = (-max_uptake, 0) sol = FBA(model, constraints=constraints, solver=solver) in_silico_growth = Status.OPTIMAL and sol.fobj > min_growth constraints[r_id] = tmp else: in_silico_growth = False if in_silico_growth: result[met] = 'TP' if in_vivo_growth else 'FP' else: result[met] = 'FN' if in_vivo_growth else 'TN' return result
def benchmark_essentiality(model, medium, in_vivo): if medium is not None: env = Environment.from_compounds(medium) else: env = Environment.complete(model) constraints = env.apply(model, inplace=False, warning=False) in_silico = essential_genes(model, constraints=constraints, min_growth=0.1) result = {} for gene, is_essential in in_vivo.items(): if is_essential: if gene in in_silico: result[gene] = 'TP' else: result[gene] = 'FN' else: if gene in in_silico: result[gene] = 'FP' else: result[gene] = 'TN' return result
def run_abiotic(comm_id, sense, community, medium_id, excluded_mets, env, verbose, min_mol_weight, other_mets, n, p, ignore_coupling): medium = set(env.get_compounds(fmt_func=lambda x: x[7:-7])) max_uptake = 10.0 * len(community.organisms) if sense == 'add': modified = sorted(other_mets - (medium | excluded_mets)) if sense == 'rm': modified = sorted(other_mets - (medium | excluded_mets)) n_extra_cpds = 2 * p modified = sample(modified, n_extra_cpds) medium = medium | set(modified) env = Environment.from_compounds( medium, fmt_func=lambda x: f"R_EX_M_{x}_e_pool", max_uptake=max_uptake) if len(modified) < p: raise RuntimeError( "Insufficient compounds ({}) to perform ({}) perturbations.". format(len(modified), p)) if n == 0: do_all = True n = len(modified) if verbose: print( 'Running {} systematic abiotic perturbations with 1 compound...' .format(n)) else: do_all = False if verbose: print( 'Running {} random abiotic perturbations with {} compounds...'. format(n, p)) data = run_detailed(comm_id, community, medium_id, excluded_mets, env, False, min_mol_weight, ignore_coupling) for i in range(n): if do_all: if sense == 'add': new_compounds = list(medium) + [modified[i]] if sense == 'rm': new_compounds = medium - {modified[i]} new_id = "{}_{}".format(medium_id, modified[i]) else: if sense == 'add': new_compounds = list(medium) + sample(modified, p) if sense == 'rm': new_compounds = medium - set(sample(modified, p)) new_id = "{}_{}".format(medium_id, i + 1) new_env = Environment.from_compounds( new_compounds, fmt_func=lambda x: f"R_EX_M_{x}_e_pool", max_uptake=max_uptake) entries = run_detailed(comm_id, community, new_id, excluded_mets, new_env, False, min_mol_weight, ignore_coupling) data.extend(entries) return data
def maincall(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None, ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None, mediadb=None, default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, reference=None, ref_score=None, recursive_mode=False): if recursive_mode: model_id = os.path.splitext(os.path.basename(inputfile))[0] if outputfile: outputfile = f'{outputfile}/{model_id}.xml' else: outputfile = os.path.splitext(inputfile)[0] + '.xml' else: if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = os.path.splitext(os.path.basename(inputfile))[0] outputfile = os.path.splitext(inputfile)[0] + '.xml' model_id = build_model_id(model_id) outputfolder = os.path.abspath(os.path.dirname(outputfile)) if not os.path.exists(outputfolder): try: os.makedirs(outputfolder) except: print('Unable to create output folder:', outputfolder) return if soft: try: soft_constraints = load_soft_constraints(soft) except IOError: raise IOError('Failed to load soft-constraints file:' + soft) else: soft_constraints = None if hard: try: hard_constraints = load_hard_constraints(hard) except IOError: raise IOError('Failed to load hard-constraints file:' + hard) else: hard_constraints = None if input_type == 'refseq': if verbose: print(f'Downloading genome {inputfile} from NCBI...') ncbi_table = load_ncbi_table(project_dir + config.get('input', 'refseq')) inputfile = download_ncbi_genome(inputfile, ncbi_table) if not inputfile: print('Failed to download genome from NCBI.') return input_type = 'protein' if inputfile.endswith('.faa.gz') else 'dna' if input_type == 'protein' or input_type == 'dna': if verbose: print('Running diamond...') diamond_db = project_dir + config.get('generated', 'diamond_db') blast_output = os.path.splitext(inputfile)[0] + '.tsv' exit_code = run_blast(inputfile, input_type, blast_output, diamond_db, diamond_args, verbose) if exit_code is None: print('Unable to run diamond (make sure diamond is available in your PATH).') return if exit_code != 0: print('Failed to run diamond.') if diamond_args is not None: print('Incorrect diamond args? Please check documentation or use default args.') return annotations = load_diamond_results(blast_output) elif input_type == 'eggnog': annotations = load_eggnog_data(inputfile) elif input_type == 'diamond': annotations = load_diamond_results(inputfile) else: raise ValueError('Invalid input type: ' + input_type) if verbose: print('Loading universe model...') if not universe_file: if universe: universe_file = f"{project_dir}{config.get('generated', 'folder')}universe_{universe}.xml.gz" else: universe_file = project_dir + config.get('generated', 'default_universe') try: universe_model = load_cbmodel(universe_file, flavor='bigg') universe_model.id = model_id except IOError: available = '\n'.join(glob(f"{project_dir}{config.get('generated', 'folder')}universe_*.xml.gz")) raise IOError(f'Failed to load universe model: {universe_file}\nAvailable universe files:\n{available}') if reference: if verbose: print('Loading reference model...') try: ref_model = load_cbmodel(reference) except: raise IOError('Failed to load reference model.') else: ref_model = None if gapfill or init: if verbose: print('Loading media library...') if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) if verbose: print('Scoring reactions...') gene_annotations = pd.read_csv(project_dir + config.get('generated', 'gene_annotations'), sep='\t') bigg_gprs = project_dir + config.get('generated', 'bigg_gprs') gprs = pd.read_csv(bigg_gprs) gprs = gprs[gprs.reaction.isin(universe_model.reactions)] debug_output = model_id if debug else None scores, gene2gene = reaction_scoring(annotations, gprs, debug_output=debug_output) if scores is None: print('The input genome did not match sufficient genes/reactions in the database.') return if not flavor: flavor = config.get('sbml', 'default_flavor') init_env = None if init: if init in media_db: init_env = Environment.from_compounds(media_db[init]) else: print(f'Error: medium {init} not in media database.') universe_model.metadata['Description'] = 'This model was built with CarveMe version ' + version if ensemble_size is None or ensemble_size <= 1: if verbose: print('Reconstructing a single model') model = carve_model(universe_model, scores, inplace=(not gapfill), default_score=default_score, uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints, hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score, init_env=init_env, debug_output=debug_output) annotate_genes(model, gene2gene, gene_annotations) else: if verbose: print('Building an ensemble of', ensemble_size, 'models') ensemble = build_ensemble(universe_model, scores, ensemble_size, init_env=init_env) annotate_genes(ensemble, gene2gene, gene_annotations) save_ensemble(ensemble, outputfile, flavor=flavor) if model is None: print("Failed to build model.") return if not gapfill: save_cbmodel(model, outputfile, flavor=flavor) else: media = gapfill.split(',') if verbose: m1, n1 = len(model.metabolites), len(model.reactions) print(f"Gap filling for {', '.join(media)}...") max_uptake = config.getint('gapfill', 'max_uptake') if blind_gapfill: scores = None else: scores = dict(scores[['reaction', 'normalized_score']].values) multiGapFill(model, universe_model, media, media_db, scores=scores, max_uptake=max_uptake, inplace=True) if verbose: m2, n2 = len(model.metabolites), len(model.reactions) print(f'Added {(n2 - n1)} reactions and {(m2 - m1)} metabolites') if init_env: # Initializes environment again as new exchange reactions can be acquired during gap-filling init_env.apply(model, inplace=True, warning=False) save_cbmodel(model, outputfile, flavor=flavor) if verbose: print('Done.')
def medium_to_constraints(model, compounds, max_uptake=10, inplace=False, verbose=False): env = Environment.from_compounds(compounds, max_uptake=max_uptake) return env.apply(model, inplace=inplace, warning=verbose)