def maincall(inputfiles, flavor=None, init=None, mediadb=None, outputfile=None): if not flavor: flavor = config.get('sbml', 'default_flavor') if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = 'community' outputfile = 'community.xml' models = [ load_cbmodel(inputfile, flavor=flavor) for inputfile in inputfiles ] community = Community(model_id, models) model = community.merged_model if init: if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) init_env = Environment.from_compounds(media_db[init]) init_env.apply(model, inplace=True) save_cbmodel(model, outputfile, flavor=flavor)
def first_run_check(): diamond_db = project_dir + config.get('generated', 'diamond_db') if not os.path.exists(diamond_db): print("Running diamond for the first time, please wait while we build the internal database...") fasta_file = project_dir + config.get('generated', 'fasta_file') cmd = ['diamond', 'makedb', '--in', fasta_file, '-d', diamond_db[:-5]] try: exit_code = subprocess.call(cmd) except OSError: print('Unable to run diamond (make sure diamond is available in your PATH).') else: if exit_code != 0: print('Failed to run diamond (wrong arguments).')
def curate(inputfile=None, outputfile=None, taxa=None, biomass=None, biomass_db_path=None, normalize_biomass=False): if inputfile: universe_draft = inputfile model_specific_data = os.path.splitext(inputfile)[0] + '.csv' else: universe_draft = project_dir + config.get('generated', 'bigg_universe') model_specific_data = project_dir + config.get('generated', 'model_specific_data') if not biomass: biomass = 'gramneg' if taxa == 'cyanobacteria' else taxa if not outputfile: folder = project_dir + config.get('generated', 'folder') outputfile = folder + f"universe_{taxa}.xml.gz" bigg_models = project_dir + config.get('input', 'bigg_models') bigg_models = pd.read_csv(bigg_models, sep='\t') manual_curation = project_dir + config.get('input', 'manually_curated') manually_curated = pd.read_csv(manual_curation, index_col=0, sep='\t') unbalanced = project_dir + config.get('input', 'unbalanced_metabolites') unbalanced = pd.read_csv(unbalanced, header=None) unbalanced = unbalanced[0].tolist() try: model = load_cbmodel(universe_draft, flavor=config.get('sbml', 'default_flavor')) model_specific_data = pd.read_csv(model_specific_data) except IOError: raise IOError('Universe draft model not found. Please run --build first to download BiGG data.') if biomass_db_path is None: biomass_db_path = project_dir + config.get('input', 'biomass_library') biomass_db = load_biomass_db(biomass_db_path, normalize_weight=normalize_biomass, model=model) if biomass not in biomass_db: valid_ids = ','.join(biomass_db.keys()) raise RuntimeError('Biomass identifier not in database. Currently in database: ' + valid_ids) biomass_eq = biomass_db[biomass] curate_universe(model, outputfile=outputfile, taxa=taxa, biomass_eq=biomass_eq, model_specific_data=model_specific_data, bigg_models=bigg_models, manually_curated=manually_curated, unbalanced_metabolites=unbalanced, )
def main(): parser = argparse.ArgumentParser( description="Generate universal model to use with CarveMe") parser.add_argument('-o', '--output', dest='output', help="Output file") args = parser.parse_args() if args.output: universe_draft = args.output model_specific_data = os.path.splitext(args.output)[0] + '.csv' bigg_gprs = os.path.splitext(args.output)[0] + '_gprs.csv' fasta_file = os.path.splitext(args.output)[0] + '.faa' gene_annotations = os.path.splitext(args.output)[0] + '.tsv' else: universe_draft = project_dir + config.get('generated', 'bigg_universe') model_specific_data = project_dir + config.get('generated', 'model_specific_data') bigg_gprs = project_dir + config.get('generated', 'bigg_gprs') fasta_file = project_dir + config.get('generated', 'fasta_file') gene_annotations = project_dir + config.get('generated', 'gene_annotations') cpd_annotation = project_dir + config.get('input', 'mnx_compounds') download_universal_model(universe_draft, cpd_annotation) download_model_specific_data(model_specific_data, bigg_gprs, fasta_file, gene_annotations)
def main(inputfiles, flavor=None, split_pool=False, no_biomass=False, init=None, mediadb=None, ext_comp_id=None, outputfile=None): if not flavor: flavor = config.get('sbml', 'default_flavor') if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = 'community' outputfile = 'community.xml' if ext_comp_id is None: ext_comp_id = 'C_e' models = [load_cbmodel(inputfile, flavor=flavor) for inputfile in inputfiles] community = Community(model_id, models, extracellular_compartment_id=ext_comp_id, merge_extracellular_compartments=(not split_pool), create_biomass=(not no_biomass)) merged = community.generate_merged_model() if init: if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) if split_pool: exchange_format = "'R_EX_M_{}_e_pool'" else: exchange_format = "'R_EX_{}_e'" init_env = Environment.from_compounds(media_db[init], exchange_format=exchange_format) init_env.apply(merged, inplace=True) save_cbmodel(merged, outputfile, flavor=flavor)
def main(): parser = argparse.ArgumentParser( description= "Merge single species models into a microbial community model") parser.add_argument('input', metavar='INPUTFILES', nargs='+', help="SBML input files (single species)") parser.add_argument('-o', '--output', dest='output', help="SBML output file (community)") parser.add_argument('-i', '--init', dest='init', help="Initialize model with given medium") parser.add_argument('--mediadb', help="Media database file") sbml = parser.add_mutually_exclusive_group() sbml.add_argument('--cobra', action='store_true', help="SBML input/output in old cobra format") sbml.add_argument('--fbc2', action='store_true', help="SBML input/output in sbml-fbc2 format") args = parser.parse_args() if len(args.input) < 2: print(args.input) parser.error( "Please provide two or more single species models as input files.") if args.fbc2: flavor = 'fbc2' elif args.cobra: flavor = 'cobra' else: flavor = config.get('sbml', 'default_flavor') maincall(inputfiles=args.input, flavor=flavor, init=args.init, mediadb=args.mediadb, outputfile=args.output)
def maincall(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow_blocked=False, biomass=None, biomass_db_path=None, normalize_biomass=False, taxa=None, outputfile=None): if mode == 'draft': if outputfile: universe_draft = outputfile model_specific_data = os.path.splitext(outputfile)[0] + '.csv' bigg_gprs = os.path.splitext(outputfile)[0] + '_gprs.csv' # fasta_file = os.path.splitext(outputfile)[0] + '.faa' else: universe_draft = project_dir + config.get('generated', 'universe_draft') model_specific_data = project_dir + config.get('generated', 'model_specific_data') bigg_gprs = project_dir + config.get('generated', 'bigg_gprs') # fasta_file = project_dir + config.get('input', 'fasta_file') build_bigg_universe_model(universe_draft) data = download_model_specific_data(model_specific_data) gprs = create_gpr_table(data, outputfile=bigg_gprs) # download_gene_sequences(gprs, outputfile=fasta_file) elif mode == 'thermo': universe_draft = project_dir + config.get('generated', 'universe_draft') equilibrator_compounds = project_dir + config.get('input', 'equilibrator_compounds') if outputfile: bigg_gibbs = outputfile else: bigg_gibbs = project_dir + config.get('generated', 'bigg_gibbs') compute_bigg_gibbs_energy(universe_draft, equilibrator_compounds, bigg_gibbs) elif mode == 'curated': universe_draft = project_dir + config.get('generated', 'universe_draft') model_specific_data = project_dir + config.get('generated', 'model_specific_data') if not biomass: if taxa == 'archaea': biomass = 'archaea' else: biomass = config.get('universe', 'default_biomass') if outputfile: universe_final = outputfile else: tag = taxa if taxa != 'bacteria' else biomass universe_final = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), tag) bigg_models = project_dir + config.get('input', 'bigg_models') bigg_models = pd.read_csv(bigg_models, index_col=0) manual_curation = project_dir + config.get('input', 'manually_curated') manually_curated = pd.read_csv(manual_curation, index_col=0) unbalanced = project_dir + config.get('input', 'unbalanced_metabolites') unbalanced = pd.read_csv(unbalanced, header=None) unbalanced = unbalanced[0].tolist() try: model = load_cbmodel(universe_draft, flavor=config.get('sbml', 'default_flavor')) model_specific_data = pd.read_csv(model_specific_data) except IOError: raise IOError('Universe draft not found. Please run --draft first to download BiGG data.') if biomass_db_path is None: biomass_db_path = project_dir + config.get('input', 'biomass_library') biomass_db = load_biomass_db(biomass_db_path, normalize_weight=normalize_biomass, model=model) if biomass not in biomass_db: valid_ids = ','.join(biomass_db.keys()) raise RuntimeError('Biomass identifier not in database. Currently in database: ' + valid_ids) biomass_eq = biomass_db[biomass] if nothermo: thermodynamics_data = None metabolomics_data = None else: try: bigg_gibbs = project_dir + config.get('generated', 'bigg_gibbs') thermodynamics_data = pd.read_csv(bigg_gibbs, index_col=0) except IOError: raise IOError('Thermodynamic data not found. Please run --thermo first to generate thermodynamic data.') metabolomics = project_dir + config.get('input', 'metabolomics') metabolomics_data = pd.read_csv(metabolomics, index_col=1) curate_universe(model, taxa=taxa, outputfile=universe_final, model_specific_data=model_specific_data, bigg_models=bigg_models, thermodynamics_data=thermodynamics_data, metabolomics_data=metabolomics_data, manually_curated=manually_curated, unbalanced_metabolites=unbalanced, biomass_eq=biomass_eq, use_heuristics=(not noheuristics), remove_unbalanced=(not allow_unbalanced), remove_blocked=(not allow_blocked)) else: print('Unrecognized option:', mode)
def maincall(inputfile, media, mediadb=None, universe=None, universe_file=None, outputfile=None, flavor=None, spent=None, verbose=False): if verbose: print('Loading model...') try: model = load_cbmodel(inputfile, flavor=flavor) except IOError: raise IOError('Failed to load model:' + inputfile) if spent: if verbose: print('Loading model for spent medium species...') try: spent_model = load_cbmodel(spent, flavor=flavor) except IOError: raise IOError('Failed to load model:' + spent) else: spent_model = None if verbose: print('Loading reaction universe...') if not universe_file: if universe: universe_file = "{}{}universe_{}.xml".format( project_dir, config.get('generated', 'folder'), universe) else: universe_file = project_dir + config.get('generated', 'default_universe') try: universe_model = load_cbmodel(universe_file, flavor='cobra') except IOError: if universe: raise IOError( 'Failed to load universe "{0}". Please run build_universe.py --{0}.' .format(universe)) else: raise IOError('Failed to load universe model:' + universe_file) if verbose: print('Loading media...') if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media database:' + mediadb) if verbose: m1, n1 = len(model.metabolites), len(model.reactions) print('Gap filling for {}...'.format(', '.join(media))) max_uptake = config.getint('gapfill', 'max_uptake') multiGapFill(model, universe_model, media, media_db, max_uptake=max_uptake, inplace=True, spent_model=spent_model) if verbose: m2, n2 = len(model.metabolites), len(model.reactions) print('Added {} reactions and {} metabolites'.format((n2 - n1), (m2 - m1))) if verbose: print('Saving SBML file...') if not outputfile: outputfile = os.path.splitext(inputfile)[0] + '_gapfill.xml' if not flavor: flavor = config.get('sbml', 'default_flavor') save_cbmodel(model, outputfile, flavor=flavor) if verbose: print('Done.')
def main(): parser = argparse.ArgumentParser( description="GapFill a metabolic model for a given set of media") parser.add_argument('input', metavar='INPUTFILE', help="SBML input file") parser.add_argument('-m', '--media', dest='media', required=True, help="List of media (comma-separated)") parser.add_argument('--mediadb', help="Media database file") parser.add_argument( '--spent-medium', metavar='SPECIES', dest='spent', help= "Add spent medium compounds generated from given species (SBML model)." ) univ = parser.add_mutually_exclusive_group() univ.add_argument('-u', '--universe', dest='universe', help="Pre-built universe model (default: bacteria)") univ.add_argument('--universe-file', dest='universe_file', help="Reaction universe file (SBML format)") parser.add_argument('-o', '--output', dest='output', type=str, help="SBML output file") parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', help="Switch to verbose mode") sbml = parser.add_mutually_exclusive_group() sbml.add_argument('--cobra', action='store_true', help="Input SBML in old cobra format") sbml.add_argument('--fbc2', action='store_true', help="Input SBML in sbml-fbc2 format") args = parser.parse_args() if args.fbc2: flavor = 'fbc2' elif args.cobra: flavor = 'cobra' else: flavor = config.get('sbml', 'default_flavor') maincall(inputfile=args.input, media=args.media.split(','), mediadb=args.mediadb, universe=args.universe, universe_file=args.universe_file, outputfile=args.output, flavor=flavor, spent=args.spent, verbose=args.verbose)
def maincall(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None, ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None, mediadb=None, default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, reference=None, ref_score=None, recursive_mode=False): if recursive_mode: model_id = os.path.splitext(os.path.basename(inputfile))[0] if outputfile: outputfile = f'{outputfile}/{model_id}.xml' else: outputfile = os.path.splitext(inputfile)[0] + '.xml' else: if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = os.path.splitext(os.path.basename(inputfile))[0] outputfile = os.path.splitext(inputfile)[0] + '.xml' model_id = build_model_id(model_id) outputfolder = os.path.abspath(os.path.dirname(outputfile)) if not os.path.exists(outputfolder): try: os.makedirs(outputfolder) except: print('Unable to create output folder:', outputfolder) return if soft: try: soft_constraints = load_soft_constraints(soft) except IOError: raise IOError('Failed to load soft-constraints file:' + soft) else: soft_constraints = None if hard: try: hard_constraints = load_hard_constraints(hard) except IOError: raise IOError('Failed to load hard-constraints file:' + hard) else: hard_constraints = None if input_type == 'refseq': if verbose: print(f'Downloading genome {inputfile} from NCBI...') ncbi_table = load_ncbi_table(project_dir + config.get('input', 'refseq')) inputfile = download_ncbi_genome(inputfile, ncbi_table) if not inputfile: print('Failed to download genome from NCBI.') return input_type = 'protein' if inputfile.endswith('.faa.gz') else 'dna' if input_type == 'protein' or input_type == 'dna': if verbose: print('Running diamond...') diamond_db = project_dir + config.get('generated', 'diamond_db') blast_output = os.path.splitext(inputfile)[0] + '.tsv' exit_code = run_blast(inputfile, input_type, blast_output, diamond_db, diamond_args, verbose) if exit_code is None: print('Unable to run diamond (make sure diamond is available in your PATH).') return if exit_code != 0: print('Failed to run diamond.') if diamond_args is not None: print('Incorrect diamond args? Please check documentation or use default args.') return annotations = load_diamond_results(blast_output) elif input_type == 'eggnog': annotations = load_eggnog_data(inputfile) elif input_type == 'diamond': annotations = load_diamond_results(inputfile) else: raise ValueError('Invalid input type: ' + input_type) if verbose: print('Loading universe model...') if not universe_file: if universe: universe_file = f"{project_dir}{config.get('generated', 'folder')}universe_{universe}.xml.gz" else: universe_file = project_dir + config.get('generated', 'default_universe') try: universe_model = load_cbmodel(universe_file, flavor='bigg') universe_model.id = model_id except IOError: available = '\n'.join(glob(f"{project_dir}{config.get('generated', 'folder')}universe_*.xml.gz")) raise IOError(f'Failed to load universe model: {universe_file}\nAvailable universe files:\n{available}') if reference: if verbose: print('Loading reference model...') try: ref_model = load_cbmodel(reference) except: raise IOError('Failed to load reference model.') else: ref_model = None if gapfill or init: if verbose: print('Loading media library...') if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) if verbose: print('Scoring reactions...') gene_annotations = pd.read_csv(project_dir + config.get('generated', 'gene_annotations'), sep='\t') bigg_gprs = project_dir + config.get('generated', 'bigg_gprs') gprs = pd.read_csv(bigg_gprs) gprs = gprs[gprs.reaction.isin(universe_model.reactions)] debug_output = model_id if debug else None scores, gene2gene = reaction_scoring(annotations, gprs, debug_output=debug_output) if scores is None: print('The input genome did not match sufficient genes/reactions in the database.') return if not flavor: flavor = config.get('sbml', 'default_flavor') init_env = None if init: if init in media_db: init_env = Environment.from_compounds(media_db[init]) else: print(f'Error: medium {init} not in media database.') universe_model.metadata['Description'] = 'This model was built with CarveMe version ' + version if ensemble_size is None or ensemble_size <= 1: if verbose: print('Reconstructing a single model') model = carve_model(universe_model, scores, inplace=(not gapfill), default_score=default_score, uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints, hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score, init_env=init_env, debug_output=debug_output) annotate_genes(model, gene2gene, gene_annotations) else: if verbose: print('Building an ensemble of', ensemble_size, 'models') ensemble = build_ensemble(universe_model, scores, ensemble_size, init_env=init_env) annotate_genes(ensemble, gene2gene, gene_annotations) save_ensemble(ensemble, outputfile, flavor=flavor) if model is None: print("Failed to build model.") return if not gapfill: save_cbmodel(model, outputfile, flavor=flavor) else: media = gapfill.split(',') if verbose: m1, n1 = len(model.metabolites), len(model.reactions) print(f"Gap filling for {', '.join(media)}...") max_uptake = config.getint('gapfill', 'max_uptake') if blind_gapfill: scores = None else: scores = dict(scores[['reaction', 'normalized_score']].values) multiGapFill(model, universe_model, media, media_db, scores=scores, max_uptake=max_uptake, inplace=True) if verbose: m2, n2 = len(model.metabolites), len(model.reactions) print(f'Added {(n2 - n1)} reactions and {(m2 - m1)} metabolites') if init_env: # Initializes environment again as new exchange reactions can be acquired during gap-filling init_env.apply(model, inplace=True, warning=False) save_cbmodel(model, outputfile, flavor=flavor) if verbose: print('Done.')
def main(): parser = argparse.ArgumentParser(description="Reconstruct a metabolic model using CarveMe", formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('input', metavar='INPUT', nargs='+', help="Input (protein fasta file by default, see other options for details).\n" + "When used with -r an input pattern with wildcards can also be used.\n" + "When used with --refseq an NCBI RefSeq assembly accession is expected." ) input_type_args = parser.add_mutually_exclusive_group() input_type_args.add_argument('--dna', action='store_true', help="Build from DNA fasta file") input_type_args.add_argument('--egg', action='store_true', help="Build from eggNOG-mapper output file") input_type_args.add_argument('--diamond', action='store_true', help=argparse.SUPPRESS) input_type_args.add_argument('--refseq', action='store_true', help="Download genome from NCBI RefSeq and build") parser.add_argument('--diamond-args', help="Additional arguments for running diamond") parser.add_argument('-r', '--recursive', action='store_true', dest='recursive', help="Bulk reconstruction from folder with genome files") parser.add_argument('-o', '--output', dest='output', help="SBML output file (or output folder if -r is used)") univ = parser.add_mutually_exclusive_group() univ.add_argument('-u', '--universe', dest='universe', help="Pre-built universe model (default: bacteria)") univ.add_argument('--universe-file', dest='universe_file', help="Reaction universe file (SBML format)") sbml = parser.add_mutually_exclusive_group() sbml.add_argument('--cobra', action='store_true', help="Output SBML in old cobra format") sbml.add_argument('--fbc2', action='store_true', help="Output SBML in sbml-fbc2 format") parser.add_argument('-n', '--ensemble', type=int, dest='ensemble', help="Build model ensemble with N models") parser.add_argument('-g', '--gapfill', dest='gapfill', help="Gap fill model for given media") parser.add_argument('-i', '--init', dest='init', help="Initialize model with given medium") parser.add_argument('--mediadb', help="Media database file") parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', help="Switch to verbose mode") parser.add_argument('-d', '--debug', action='store_true', dest='debug', help="Debug mode: writes intermediate results into output files") parser.add_argument('--soft', help="Soft constraints file") parser.add_argument('--hard', help="Hard constraints file") parser.add_argument('--reference', help="Manually curated model of a close reference species.") parser.add_argument('--default-score', type=float, default=-1.0, help=argparse.SUPPRESS) parser.add_argument('--uptake-score', type=float, default=0.0, help=argparse.SUPPRESS) parser.add_argument('--soft-score', type=float, default=1.0, help=argparse.SUPPRESS) parser.add_argument('--reference-score', type=float, default=0.0, help=argparse.SUPPRESS) parser.add_argument('--blind-gapfill', action='store_true', help=argparse.SUPPRESS) args = parser.parse_args() if args.gapfill and args.ensemble: parser.error('Gap fill and ensemble generation cannot currently be combined (not implemented yet).') if (args.soft or args.hard) and args.ensemble: parser.error('Soft/hard constraints and ensemble generation cannot currently be combined (not implemented yet).') if args.mediadb and not args.gapfill: parser.error('--mediadb can only be used with --gapfill') if args.recursive and args.refseq: parser.error('-r cannot be combined with --refseq') if args.egg: input_type = 'eggnog' elif args.dna: input_type = 'dna' elif args.diamond: input_type = 'diamond' elif args.refseq: input_type = 'refseq' else: input_type = 'protein' if args.fbc2: flavor = 'fbc2' elif args.cobra: flavor = 'cobra' else: flavor = config.get('sbml', 'default_flavor') first_run_check() if not args.recursive: if len(args.input) > 1: parser.error('Use -r when specifying more than one input file') maincall( inputfile=args.input[0], input_type=input_type, outputfile=args.output, diamond_args=args.diamond_args, universe=args.universe, universe_file=args.universe_file, ensemble_size=args.ensemble, verbose=args.verbose, debug=args.debug, flavor=flavor, gapfill=args.gapfill, blind_gapfill=False, init=args.init, mediadb=args.mediadb, default_score=args.default_score, uptake_score=args.uptake_score, soft_score=args.soft_score, soft=args.soft, hard=args.hard, reference=args.reference, ref_score=args.reference_score ) else: def f(x): maincall( inputfile=x, input_type=input_type, outputfile=args.output, diamond_args=args.diamond_args, universe=args.universe, universe_file=args.universe_file, ensemble_size=args.ensemble, verbose=args.verbose, flavor=flavor, gapfill=args.gapfill, blind_gapfill=False, init=args.init, mediadb=args.mediadb, default_score=args.default_score, uptake_score=args.uptake_score, soft_score=args.soft_score, soft=args.soft, hard=args.hard, reference=args.reference, ref_score=args.reference_score, recursive_mode=True ) p = Pool() p.map(f, args.input)
def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None, ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None, mediadb=None, default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, reference=None, ref_score=None, recursive_mode=False, specified_solver=None, feas_tol=None, opt_tol=None, int_feas_tol=None): if recursive_mode: model_id = os.path.splitext(os.path.basename(inputfile))[0] if outputfile: outputfile = '{}/{}.xml'.format(outputfile, model_id) else: outputfile = os.path.splitext(inputfile)[0] + '.xml' else: if outputfile: model_id = os.path.splitext(os.path.basename(outputfile))[0] else: model_id = os.path.splitext(os.path.basename(inputfile))[0] outputfile = os.path.splitext(inputfile)[0] + '.xml' model_id = build_model_id(model_id) outputfolder = os.path.abspath(os.path.dirname(outputfile)) if not os.path.exists(outputfolder): try: os.makedirs(outputfolder) except: print('Unable to create output folder:', outputfolder) return if soft: try: soft_constraints = load_soft_constraints(soft) except IOError: raise IOError('Failed to load soft-constraints file:' + soft) else: soft_constraints = None if hard: try: hard_constraints = load_hard_constraints(hard) except IOError: raise IOError('Failed to load hard-constraints file:' + hard) else: hard_constraints = None if input_type == 'refseq' or input_type == 'genbank': if verbose: print('Downloading genome {} from NCBI...'.format(inputfile)) ncbi_table = load_ncbi_table(project_dir + config.get('ncbi', input_type)) inputfile = download_ncbi_genome(inputfile, ncbi_table) if not inputfile: print('Failed to download genome from NCBI.') return input_type = 'protein' if inputfile.endswith('.faa.gz') else 'dna' if input_type == 'protein' or input_type == 'dna': if verbose: print('Running diamond...') diamond_db = project_dir + config.get('input', 'diamond_db') blast_output = os.path.splitext(inputfile)[0] + '.tsv' exit_code = run_blast(inputfile, input_type, blast_output, diamond_db, diamond_args, verbose) if exit_code is None: print('Unable to run diamond (make sure diamond is available in your PATH).') return if exit_code != 0: print('Failed to run diamond.') if diamond_args is not None: print('Incorrect diamond args? Please check documentation or use default args.') return annotations = load_diamond_results(blast_output) elif input_type == 'eggnog': annotations = load_eggnog_data(inputfile) elif input_type == 'diamond': annotations = load_diamond_results(inputfile) else: raise ValueError('Invalid input type: ' + input_type) if verbose: print('Loading universe model...') if not universe_file: if universe: universe_file = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), universe) else: universe_file = project_dir + config.get('generated', 'default_universe') # change default solver if a solver is specified in the input if specified_solver is not None: if specified_solver != config.get('solver', 'default_solver'): set_default_solver(specified_solver) params_to_set = {'FEASIBILITY_TOL': feas_tol, 'OPTIMALITY_TOL': opt_tol, 'INT_FEASIBILITY_TOL': int_feas_tol} for key,value in params_to_set.items(): if value is not None: set_default_parameter(getattr(Parameter, key), value) try: universe_model = load_cbmodel(universe_file, flavor=config.get('sbml', 'default_flavor')) universe_model.id = model_id except IOError: available = '\n'.join(glob("{}{}universe_*.xml.gz".format(project_dir, config.get('generated', 'folder')))) raise IOError('Failed to load universe model: {}\nAvailable universe files:\n{}'.format(universe_file, available)) if reference: if verbose: print('Loading reference model...') try: ref_model = load_cbmodel(reference) except: raise IOError('Failed to load reference model.') else: ref_model = None if gapfill or init: if verbose: print('Loading media library...') if not mediadb: mediadb = project_dir + config.get('input', 'media_library') try: media_db = load_media_db(mediadb) except IOError: raise IOError('Failed to load media library:' + mediadb) if verbose: print('Scoring reactions...') bigg_gprs = project_dir + config.get('generated', 'bigg_gprs') gprs = pd.read_csv(bigg_gprs) gprs = gprs[gprs.reaction.isin(universe_model.reactions)] debug_output = model_id if debug else None scores = reaction_scoring(annotations, gprs, debug_output=debug_output) if scores is None: print('The input genome did not match sufficient genes/reactions in the database.') return if not flavor: flavor = config.get('sbml', 'default_flavor') init_env = None if init: if init in media_db: init_env = Environment.from_compounds(media_db[init]) else: print('Error: medium {} not in media database.'.format(init)) universe_model.metadata['Description'] = 'This model was built with CarveMe version ' + version if ensemble_size is None or ensemble_size <= 1: if verbose: print('Reconstructing a single model') if not gapfill: carve_model(universe_model, scores, outputfile=outputfile, flavor=flavor, default_score=default_score, uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints, hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score, init_env=init_env, debug_output=debug_output) else: model = carve_model(universe_model, scores, inplace=False, default_score=default_score, uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints, hard_constraints=hard_constraints, ref_model=ref_model, ref_score=ref_score, init_env=init_env, debug_output=debug_output) else: if verbose: print('Building an ensemble of', ensemble_size, 'models') build_ensemble(universe_model, scores, ensemble_size, outputfile, flavor, init_env=init_env) if gapfill and model is not None: media = gapfill.split(',') if verbose: m1, n1 = len(model.metabolites), len(model.reactions) print('Gap filling for {}...'.format(', '.join(media))) max_uptake = config.getint('gapfill', 'max_uptake') if blind_gapfill: scores = None else: scores = dict(scores[['reaction', 'normalized_score']].values) multiGapFill(model, universe_model, media, media_db, scores=scores, max_uptake=max_uptake, inplace=True) if verbose: m2, n2 = len(model.metabolites), len(model.reactions) print('Added {} reactions and {} metabolites'.format((n2 - n1), (m2 - m1))) if init_env: #Should initialize enviroment again as new exchange reactions can be acquired during gap-filling init_env.apply(model, inplace=True, warning=False) save_cbmodel(model, outputfile, flavor=flavor) if verbose: print('Done.')