def maincall(inputfiles, flavor=None, init=None, mediadb=None, outputfile=None): if not flavor: flavor = config.get('sbml', 'default_flavor') if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = 'community' outputfile = 'community.xml' models = [ load_cbmodel(inputfile, flavor=flavor) for inputfile in inputfiles ] community = Community(model_id, models) model = community.merged_model if init: if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) init_env = Environment.from_compounds(media_db[init]) init_env.apply(model, inplace=True) save_cbmodel(model, outputfile, flavor=flavor)
def compute_bigg_gibbs_energy(modelfile, equi_cmpds_file, outputfile=None): """ Calculate standard Gibbs Energy for reactions in a model (as many as possible) using eQuilibrator. Args: modelfile (str): SBML file equi_cmpds_file (str): file containing KEGG compounds accepted by eQuilibrator outputfile (str): output CSV file (optional) Returns: dict: standard Gibbs Energies indexed by reaction ids dict: estimation error indexed by reaction ids """ model = load_cbmodel(modelfile) kegg_compounds = pd.read_csv(equi_cmpds_file, sep='\t') kegg_compounds = set(kegg_compounds['compound_id']) dG0, sdG0 = calculate_deltaG0s(model, kegg_compounds, pH=default_pH, I=default_I, T=default_T) if outputfile: data = pd.DataFrame({'dG0': dG0, 'sdG0': sdG0}) data.to_csv(outputfile) else: return dG0, sdG0
def __init__(self, model): try: self.model = load_cbmodel( "C:\\Users\\Pedro\\OneDrive\\Documentos\\UMinho\\WholeNewProject\\PythonThings\\Models\\" + model) self.useReframed = True except: self.model = ProcessXML(model) self.model.getCompartments() self.model.getSpecies() self.model.getReactions() self.useReframed = False
def curate(inputfile=None, outputfile=None, taxa=None, biomass=None, biomass_db_path=None, normalize_biomass=False): if inputfile: universe_draft = inputfile model_specific_data = os.path.splitext(inputfile)[0] + '.csv' else: universe_draft = project_dir + config.get('generated', 'bigg_universe') model_specific_data = project_dir + config.get('generated', 'model_specific_data') if not biomass: biomass = 'gramneg' if taxa == 'cyanobacteria' else taxa if not outputfile: folder = project_dir + config.get('generated', 'folder') outputfile = folder + f"universe_{taxa}.xml.gz" bigg_models = project_dir + config.get('input', 'bigg_models') bigg_models = pd.read_csv(bigg_models, sep='\t') manual_curation = project_dir + config.get('input', 'manually_curated') manually_curated = pd.read_csv(manual_curation, index_col=0, sep='\t') unbalanced = project_dir + config.get('input', 'unbalanced_metabolites') unbalanced = pd.read_csv(unbalanced, header=None) unbalanced = unbalanced[0].tolist() try: model = load_cbmodel(universe_draft, flavor=config.get('sbml', 'default_flavor')) model_specific_data = pd.read_csv(model_specific_data) except IOError: raise IOError('Universe draft model not found. Please run --build first to download BiGG data.') if biomass_db_path is None: biomass_db_path = project_dir + config.get('input', 'biomass_library') biomass_db = load_biomass_db(biomass_db_path, normalize_weight=normalize_biomass, model=model) if biomass not in biomass_db: valid_ids = ','.join(biomass_db.keys()) raise RuntimeError('Biomass identifier not in database. Currently in database: ' + valid_ids) biomass_eq = biomass_db[biomass] curate_universe(model, outputfile=outputfile, taxa=taxa, biomass_eq=biomass_eq, model_specific_data=model_specific_data, bigg_models=bigg_models, manually_curated=manually_curated, unbalanced_metabolites=unbalanced, )
def maincall(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow_blocked=False, biomass=None, biomass_db_path=None, normalize_biomass=False, taxa=None, outputfile=None): if mode == 'draft': if outputfile: universe_draft = outputfile model_specific_data = os.path.splitext(outputfile)[0] + '.csv' bigg_gprs = os.path.splitext(outputfile)[0] + '_gprs.csv' # fasta_file = os.path.splitext(outputfile)[0] + '.faa' else: universe_draft = project_dir + config.get('generated', 'universe_draft') model_specific_data = project_dir + config.get('generated', 'model_specific_data') bigg_gprs = project_dir + config.get('generated', 'bigg_gprs') # fasta_file = project_dir + config.get('input', 'fasta_file') build_bigg_universe_model(universe_draft) data = download_model_specific_data(model_specific_data) gprs = create_gpr_table(data, outputfile=bigg_gprs) # download_gene_sequences(gprs, outputfile=fasta_file) elif mode == 'thermo': universe_draft = project_dir + config.get('generated', 'universe_draft') equilibrator_compounds = project_dir + config.get('input', 'equilibrator_compounds') if outputfile: bigg_gibbs = outputfile else: bigg_gibbs = project_dir + config.get('generated', 'bigg_gibbs') compute_bigg_gibbs_energy(universe_draft, equilibrator_compounds, bigg_gibbs) elif mode == 'curated': universe_draft = project_dir + config.get('generated', 'universe_draft') model_specific_data = project_dir + config.get('generated', 'model_specific_data') if not biomass: if taxa == 'archaea': biomass = 'archaea' else: biomass = config.get('universe', 'default_biomass') if outputfile: universe_final = outputfile else: tag = taxa if taxa != 'bacteria' else biomass universe_final = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), tag) bigg_models = project_dir + config.get('input', 'bigg_models') bigg_models = pd.read_csv(bigg_models, index_col=0) manual_curation = project_dir + config.get('input', 'manually_curated') manually_curated = pd.read_csv(manual_curation, index_col=0) unbalanced = project_dir + config.get('input', 'unbalanced_metabolites') unbalanced = pd.read_csv(unbalanced, header=None) unbalanced = unbalanced[0].tolist() try: model = load_cbmodel(universe_draft, flavor=config.get('sbml', 'default_flavor')) model_specific_data = pd.read_csv(model_specific_data) except IOError: raise IOError('Universe draft not found. Please run --draft first to download BiGG data.') if biomass_db_path is None: biomass_db_path = project_dir + config.get('input', 'biomass_library') biomass_db = load_biomass_db(biomass_db_path, normalize_weight=normalize_biomass, model=model) if biomass not in biomass_db: valid_ids = ','.join(biomass_db.keys()) raise RuntimeError('Biomass identifier not in database. Currently in database: ' + valid_ids) biomass_eq = biomass_db[biomass] if nothermo: thermodynamics_data = None metabolomics_data = None else: try: bigg_gibbs = project_dir + config.get('generated', 'bigg_gibbs') thermodynamics_data = pd.read_csv(bigg_gibbs, index_col=0) except IOError: raise IOError('Thermodynamic data not found. Please run --thermo first to generate thermodynamic data.') metabolomics = project_dir + config.get('input', 'metabolomics') metabolomics_data = pd.read_csv(metabolomics, index_col=1) curate_universe(model, taxa=taxa, outputfile=universe_final, model_specific_data=model_specific_data, bigg_models=bigg_models, thermodynamics_data=thermodynamics_data, metabolomics_data=metabolomics_data, manually_curated=manually_curated, unbalanced_metabolites=unbalanced, biomass_eq=biomass_eq, use_heuristics=(not noheuristics), remove_unbalanced=(not allow_unbalanced), remove_blocked=(not allow_blocked)) else: print('Unrecognized option:', mode)
def maincall(inputfile, media, mediadb=None, universe=None, universe_file=None, outputfile=None, flavor=None, spent=None, verbose=False): if verbose: print('Loading model...') try: model = load_cbmodel(inputfile, flavor=flavor) except IOError: raise IOError('Failed to load model:' + inputfile) if spent: if verbose: print('Loading model for spent medium species...') try: spent_model = load_cbmodel(spent, flavor=flavor) except IOError: raise IOError('Failed to load model:' + spent) else: spent_model = None if verbose: print('Loading reaction universe...') if not universe_file: if universe: universe_file = "{}{}universe_{}.xml".format( project_dir, config.get('generated', 'folder'), universe) else: universe_file = project_dir + config.get('generated', 'default_universe') try: universe_model = load_cbmodel(universe_file, flavor='cobra') except IOError: if universe: raise IOError( 'Failed to load universe "{0}". Please run build_universe.py --{0}.' .format(universe)) else: raise IOError('Failed to load universe model:' + universe_file) if verbose: print('Loading media...') if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media database:' + mediadb) if verbose: m1, n1 = len(model.metabolites), len(model.reactions) print('Gap filling for {}...'.format(', '.join(media))) max_uptake = config.getint('gapfill', 'max_uptake') multiGapFill(model, universe_model, media, media_db, max_uptake=max_uptake, inplace=True, spent_model=spent_model) if verbose: m2, n2 = len(model.metabolites), len(model.reactions) print('Added {} reactions and {} metabolites'.format((n2 - n1), (m2 - m1))) if verbose: print('Saving SBML file...') if not outputfile: outputfile = os.path.splitext(inputfile)[0] + '_gapfill.xml' if not flavor: flavor = config.get('sbml', 'default_flavor') save_cbmodel(model, outputfile, flavor=flavor) if verbose: print('Done.')
def load_models(): models = {} for org_id in organisms: models[org_id] = load_cbmodel(f"{data_path}/models/{org_id}.xml", flavor='bigg') return models
def maincall(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None, ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None, mediadb=None, default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, reference=None, ref_score=None, recursive_mode=False): if recursive_mode: model_id = os.path.splitext(os.path.basename(inputfile))[0] if outputfile: outputfile = f'{outputfile}/{model_id}.xml' else: outputfile = os.path.splitext(inputfile)[0] + '.xml' else: if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = os.path.splitext(os.path.basename(inputfile))[0] outputfile = os.path.splitext(inputfile)[0] + '.xml' model_id = build_model_id(model_id) outputfolder = os.path.abspath(os.path.dirname(outputfile)) if not os.path.exists(outputfolder): try: os.makedirs(outputfolder) except: print('Unable to create output folder:', outputfolder) return if soft: try: soft_constraints = load_soft_constraints(soft) except IOError: raise IOError('Failed to load soft-constraints file:' + soft) else: soft_constraints = None if hard: try: hard_constraints = load_hard_constraints(hard) except IOError: raise IOError('Failed to load hard-constraints file:' + hard) else: hard_constraints = None if input_type == 'refseq': if verbose: print(f'Downloading genome {inputfile} from NCBI...') ncbi_table = load_ncbi_table(project_dir + config.get('input', 'refseq')) inputfile = download_ncbi_genome(inputfile, ncbi_table) if not inputfile: print('Failed to download genome from NCBI.') return input_type = 'protein' if inputfile.endswith('.faa.gz') else 'dna' if input_type == 'protein' or input_type == 'dna': if verbose: print('Running diamond...') diamond_db = project_dir + config.get('generated', 'diamond_db') blast_output = os.path.splitext(inputfile)[0] + '.tsv' exit_code = run_blast(inputfile, input_type, blast_output, diamond_db, diamond_args, verbose) if exit_code is None: print('Unable to run diamond (make sure diamond is available in your PATH).') return if exit_code != 0: print('Failed to run diamond.') if diamond_args is not None: print('Incorrect diamond args? Please check documentation or use default args.') return annotations = load_diamond_results(blast_output) elif input_type == 'eggnog': annotations = load_eggnog_data(inputfile) elif input_type == 'diamond': annotations = load_diamond_results(inputfile) else: raise ValueError('Invalid input type: ' + input_type) if verbose: print('Loading universe model...') if not universe_file: if universe: universe_file = f"{project_dir}{config.get('generated', 'folder')}universe_{universe}.xml.gz" else: universe_file = project_dir + config.get('generated', 'default_universe') try: universe_model = load_cbmodel(universe_file, flavor='bigg') universe_model.id = model_id except IOError: available = '\n'.join(glob(f"{project_dir}{config.get('generated', 'folder')}universe_*.xml.gz")) raise IOError(f'Failed to load universe model: {universe_file}\nAvailable universe files:\n{available}') if reference: if verbose: print('Loading reference model...') try: ref_model = load_cbmodel(reference) except: raise IOError('Failed to load reference model.') else: ref_model = None if gapfill or init: if verbose: print('Loading media library...') if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) if verbose: print('Scoring reactions...') gene_annotations = pd.read_csv(project_dir + config.get('generated', 'gene_annotations'), sep='\t') bigg_gprs = project_dir + config.get('generated', 'bigg_gprs') gprs = pd.read_csv(bigg_gprs) gprs = gprs[gprs.reaction.isin(universe_model.reactions)] debug_output = model_id if debug else None scores, gene2gene = reaction_scoring(annotations, gprs, debug_output=debug_output) if scores is None: print('The input genome did not match sufficient genes/reactions in the database.') return if not flavor: flavor = config.get('sbml', 'default_flavor') init_env = None if init: if init in media_db: init_env = Environment.from_compounds(media_db[init]) else: print(f'Error: medium {init} not in media database.') universe_model.metadata['Description'] = 'This model was built with CarveMe version ' + version if ensemble_size is None or ensemble_size <= 1: if verbose: print('Reconstructing a single model') model = carve_model(universe_model, scores, inplace=(not gapfill), default_score=default_score, uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints, hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score, init_env=init_env, debug_output=debug_output) annotate_genes(model, gene2gene, gene_annotations) else: if verbose: print('Building an ensemble of', ensemble_size, 'models') ensemble = build_ensemble(universe_model, scores, ensemble_size, init_env=init_env) annotate_genes(ensemble, gene2gene, gene_annotations) save_ensemble(ensemble, outputfile, flavor=flavor) if model is None: print("Failed to build model.") return if not gapfill: save_cbmodel(model, outputfile, flavor=flavor) else: media = gapfill.split(',') if verbose: m1, n1 = len(model.metabolites), len(model.reactions) print(f"Gap filling for {', '.join(media)}...") max_uptake = config.getint('gapfill', 'max_uptake') if blind_gapfill: scores = None else: scores = dict(scores[['reaction', 'normalized_score']].values) multiGapFill(model, universe_model, media, media_db, scores=scores, max_uptake=max_uptake, inplace=True) if verbose: m2, n2 = len(model.metabolites), len(model.reactions) print(f'Added {(n2 - n1)} reactions and {(m2 - m1)} metabolites') if init_env: # Initializes environment again as new exchange reactions can be acquired during gap-filling init_env.apply(model, inplace=True, warning=False) save_cbmodel(model, outputfile, flavor=flavor) if verbose: print('Done.')
def simulate(wrappers, medium, method): """ Run a SteadyCom community simulation. Parameters ---------- wrappers: list(storage.ModelWrapper) A list of model wrappers containing cobrapy model instances. medium: list(str) A list of compound names. Exchange reaction identifiers are assumed to be formatted according to: "EX_{compound}_e" method: str The community simulation method. Currently accepted strings: "steadycom" or "steadiercom". """ if method not in METHODS: raise ValueError(f"Unsupported community simulation method '{method}'") with warnings.catch_warnings(record=True) as reframed_warnings: logger.debug("Converting cobrapy models to reframed models") rf_models = [] for model in [wrapper.model for wrapper in wrappers]: # The most funcational approach (albeit slow) seems to be to write and # reload SBML. reframed's cobrapy integration is currently pretty # minimal. with tempfile.NamedTemporaryFile() as file_: cobra.io.write_sbml_model(model, file_.name) # TODO: Consider accepting the flavor argument as a parameter # instead of always assuming BiGG. rf_models.append( reframed.load_cbmodel(file_.name, flavor="bigg")) logger.debug("Merging individual models to a community") community = reframed.Community("community", rf_models) logger.debug("Applying medium to the community") environment = reframed.Environment.from_compounds( medium, fmt_func=lambda x: f"R_EX_M_{x}_e") environment.apply(community.merged_model, inplace=True) if method == "steadycom": logger.info(f"Simulating community model with SteadyCom") solution = reframed.SteadyCom(community) elif method == "steadiercom": logger.info(f"Simulating community model with SteadyCom") solution = reframed.SteadierCom(community) logger.debug(f"Formatting solution response") def model_id(original_id): """Map the models original name back to our platform internal DB IDs.""" return next(wrapper.id for wrapper in wrappers if wrapper.model.id == original_id) # Calculate transactions (cross-feeding, uptake and secretion) logger.debug( f"Calculating transactions (cross-feeding, uptake and secretion)") ex_met_ids = solution.community.merged_model.get_external_metabolites() met_id2name = create_metabolite_id2name_mapping(ex_met_ids, community) transactions = generate_transactions(met_id2name, solution.exchange_map) # Convert the iterables to dictionaries for easier handling on the frontend abundance = [{ "id": model_id(original_id), "value": abundance } for original_id, abundance in solution.abundance.items()] cross_feeding = [] for transaction in transactions: if transaction[0] == "medium": cross_feeding.append({ "from": "medium", "to": model_id(transaction[1]), "metabolite_id": transaction[2], "metabolite_name": transaction[3], "value": transaction[4], }) elif transaction[1] == "medium": cross_feeding.append({ "from": model_id(transaction[0]), "to": "medium", "metabolite_id": transaction[2], "metabolite_name": transaction[3], "value": transaction[4], }) else: cross_feeding.append({ "from": model_id(transaction[0]), "to": model_id(transaction[1]), "metabolite_id": transaction[2], "metabolite_name": transaction[3], "value": transaction[4], }) return { "growth_rate": solution.growth, "abundance": abundance, "cross_feeding": cross_feeding, "warnings": [" ".join(w.message.args) for w in reframed_warnings], }