def ubiquitize_model(in_sbml, chebi, groups_sbml, ub_s_ids=None, ub_chebi_ids=None): """ Infers and marks ubiquitous species in the model. :param in_sbml: str, path to the input SBML file :param chebi: mod_sbml.onto.obo_ontology.Ontology ChEBI ontology :param groups_sbml: str, path to the output SBML file (with groups extension) :param ub_s_ids: optional, ids of ubiquitous species (will be inferred if set to None) :param ub_chebi_ids: optional, ids of ubiquitous ChEBI terms (will be inferred if set to None) :return: tuple (s_id2chebi_id, ub_s_ids): dict {species_id: ChEBI_term_id}, collection of ubiquitous species_ids. """ input_doc = libsbml.SBMLReader().readSBML(in_sbml) input_model = input_doc.getModel() annotate_metabolites(input_model, chebi) logging.info("mapping species to ChEBI") s_id2chebi_id = get_species_id2chebi_id(input_model) _, ub_s_ids = get_ub_elements(input_model, chebi, s_id2chebi_id, ub_chebi_ids, ub_s_ids) save_as_comp_generalized_sbml(input_model, None, groups_sbml, {}, {}, ub_s_ids, chebi) return s_id2chebi_id, ub_s_ids
def generalize_model(in_sbml, chebi, groups_sbml, out_sbml, ub_s_ids=None, ub_chebi_ids=None, ignore_biomass=True): """ Generalizes a model. :param in_sbml: str, path to the input SBML file :param chebi: mod_sbml.onto.obo_ontology.Ontology ChEBI ontology :param groups_sbml: str, path to the output SBML file (with groups extension) :param out_sbml: str, path to the output SBML file (generalized) :param ub_s_ids: optional, ids of ubiquitous species (will be inferred if set to None) :param ub_chebi_ids: optional, ids of ubiquitous ChEBI terms (will be inferred if set to None) :param ignore_biomass: boolean, whether to ignore the biomass reaction (and its stoichiometry preserving constraint) :return: tuple (r_id2g_eq, s_id2gr_id, s_id2chebi_id, ub_s_ids): dict {reaction_id: reaction_group_id}, dict {species_id: species_group_id}, dict {species_id: ChEBI_term_id}, collection of ubiquitous species_ids. """ # input_model input_doc = libsbml.SBMLReader().readSBML(in_sbml) input_model = input_doc.getModel() r_ids_to_ignore = get_biomass_r_ids(input_model) if ignore_biomass else None remove_is_a_reactions(input_model) annotate_metabolites(input_model, chebi) # TODO: fix comp separation # separate_boundary_metabolites(input_model) remove_unused_elements(input_model) logging.info("mapping species to ChEBI") s_id2chebi_id = get_species_id2chebi_id(input_model) ub_chebi_ids, ub_s_ids = get_ub_elements(input_model, chebi, s_id2chebi_id, ub_chebi_ids, ub_s_ids) terms = (t for t in (chebi.get_term(t_id) for t_id in s_id2chebi_id.values()) if t) old_onto_len = len(chebi) filter_ontology(chebi, terms, relationships=EQUIVALENT_RELATIONSHIPS, min_deepness=3) logging.info('Filtered the ontology from %d terms to %d' % (old_onto_len, len(chebi))) threshold = min(max(3, int(0.1 * input_model.getNumReactions())), UBIQUITOUS_THRESHOLD) s_id2clu, ub_s_ids = generalize_species(input_model, s_id2chebi_id, ub_s_ids, chebi, ub_chebi_ids, threshold, r_ids_to_ignore=r_ids_to_ignore) logging.info("generalized species") r_id2clu = generalize_reactions(input_model, s_id2clu, s_id2chebi_id, ub_chebi_ids, r_ids_to_ignore=r_ids_to_ignore) logging.info("generalized reactions") clu2s_ids = {(c_id, term): s_ids for ((c_id, (term, )), s_ids) in invert_map(s_id2clu).items()} r_id2g_eq, s_id2gr_id = save_as_comp_generalized_sbml(input_model, out_sbml, groups_sbml, r_id2clu, clu2s_ids, ub_s_ids, chebi) return r_id2g_eq, s_id2gr_id, s_id2chebi_id, ub_s_ids
def multimodel_pipeline(sbml2parameters, res_dir, treeefm_path, max_efm_number=1000, rewrite=True, org=None): create_dirs(res_dir, rewrite) get_f_path = lambda f: os.path.join('..', os.path.relpath(f, res_dir)) if f else None tab2html = {} model_id2sbml, model_id2S, model_id2efm_id2pws = {}, {}, {} name2pw = get_name2pw() pts = parse_simple(get_pts()) root_ids = {t.get_id() for t in pts.get_roots()} chebi = parse(CHEBI) ub_ch_ids = get_ubiquitous_chebi_ids(add_common=True, add_cofactors=True, chebi=chebi) efm_id2pws = {} model_id2cofactors = {} modeld_id2m_id2chebi_id = {} for sbml, (r_id2rev, r_id2rev_banned) in sbml2parameters.items(): doc = libsbml.SBMLReader().readSBML(sbml) model = doc.getModel() model_name = get_model_name(model=model) short_model_name = model_name if len(model_name) > 12: short_model_name = model_name[:10].strip('-_ ') if len(short_model_name) == 10: short_model_name += '...' safe_m_name = ''.join(ch for ch in short_model_name.replace(' ', '_') if ch.isalnum() or '_' == ch) logging.info('Analysing %s...' % model_name) # create directories to store results logging.info("Preparing directories...") m_dir = os.path.join(res_dir, safe_m_name) create_dirs(m_dir, rewrite) # exchange_rs = get_exchange_reactions(model) # csv = '%s/%s.exchanges.csv' % (m_dir, safe_m_name) # df2csv(reactions2df(model, exchange_rs), csv) cofactors = select_metabolite_ids_by_term_ids(model, ub_ch_ids) if r_id2rev: constraint_exchange_reactions(model, forsed_r_id2rev=r_id2rev, prohibited_r_id2rev=r_id2rev_banned, cofactors=cofactors if not r_id2rev_banned else None) logging.info("Annotating the model...") annotate(model, org=org, reactions=False, pathways=False, chebi=chebi) m_id2ch_id = get_species_id2chebi_id(model) # copy our model in the result directory sbml = os.path.join(m_dir, '%s.constrained.xml' % safe_m_name) libsbml.SBMLWriter().writeSBMLToFile(doc, sbml) description = model_serializer.serialize(sbml, model, model_name, r_id2rev, m_dir, get_f_path) pw2rs = get_pathways(model, pts, name2pw, root_ids) logging.info("Performing EFMA...") efma_dir = os.path.join(m_dir, 'efma') create_dirs(efma_dir, rewrite) S, efm_id2pws = analyse_model_efm(model, efma_dir, r_id2rev, tree_efm_path=treeefm_path, max_efm_number=max_efm_number, rewrite=rewrite, pw2rs=pw2rs) for serializer in (efm_serializer.serialize, coupled_reaction_group_serializer.serialize): description += \ serializer(model=model, path=efma_dir, get_f_path=get_f_path, S=S, model_name=model_name) if S.gr_id2r_id2c: sbml = os.path.join(efma_dir, '%s.folded.xml' % safe_m_name) create_folded_model(S, model) libsbml.SBMLWriter().writeSBMLToFile(doc, sbml) if not S or not S.efm_id2i: description += describe('nothing_found.html') model_id2sbml[safe_m_name] = sbml model_id2S[safe_m_name] = S model_id2efm_id2pws[safe_m_name] = efm_id2pws model_id2cofactors[safe_m_name] = cofactors modeld_id2m_id2chebi_id[safe_m_name] = m_id2ch_id tab2html['Analysis of %s' % short_model_name] = description, None cofactors = set() m_id2ch_id = {} if len(model_id2sbml) > 1: mm_dir = os.path.join(res_dir, 'merged_model') create_dirs(mm_dir) sbml, S, model_id2id2id, common_ids, model_id2dfs, mappings = combine_models(model_id2sbml, model_id2S, mm_dir) for model_id in model_id2sbml.keys(): efm_id2pws.update({model_id2id2id[model_id][efm_id]: pws for (efm_id, pws) in model_id2efm_id2pws[model_id].items() if efm_id in model_id2id2id[model_id]}) cofactors |= {model_id2id2id[model_id][m_id] for m_id in model_id2cofactors[model_id] if m_id in model_id2id2id[model_id]} m_id2ch_id.update({model_id2id2id[model_id][m_id]: ch_id for (m_id, ch_id) in modeld_id2m_id2chebi_id[model_id].items() if m_id in model_id2id2id[model_id]}) tab2html['Model comparison'] = mapping_serializer.serialize(model_id2dfs, *mappings, mm_dir, get_f_path), None title = 'Combined model analysis' else: model_id, sbml = next(model_id2sbml.items()) efm_id2pws = model_id2efm_id2pws[model_id] cofactors = model_id2cofactors[model_id] m_id2ch_id = modeld_id2m_id2chebi_id[model_id] S = model_id2S[model_id].get_main_S() info, title, id2color = '', 'Model analysis', None # Communities logging.info("Analysing communities...") comm_dir = os.path.join(res_dir, 'communities') create_dirs(comm_dir, rewrite) # id2cluster = detect_communities_by_inputs_of_type(S, 'AMINO ACID', m_id2ch_id, chebi) id2cluster = detect_communities_by_boundary_metabolites(S, cofactors=cofactors, threshold=50) if id2cluster: doc = libsbml.SBMLReader().readSBML(sbml) model = doc.getModel() description = \ community_serializer.serialize(model, S, id2cluster, comm_dir, get_f_path, m_id2ch_id, chebi) if len(model_id2sbml) > 1: tab2html['Model comparison'] = tab2html['Model comparison'][0] + description, None else: tab2html['EFM communities'] = description, None serialize(res_dir, tab2html, title)
def generalize_model(in_sbml, chebi, groups_sbml, out_sbml, ub_s_ids=None, ub_chebi_ids=None, ignore_biomass=True): """ Generalizes a model. :param in_sbml: str, path to the input SBML file :param chebi: mod_sbml.onto.obo_ontology.Ontology ChEBI ontology :param groups_sbml: str, path to the output SBML file (with groups extension) :param out_sbml: str, path to the output SBML file (generalized) :param ub_s_ids: optional, ids of ubiquitous species (will be inferred if set to None) :param ub_chebi_ids: optional, ids of ubiquitous ChEBI terms (will be inferred if set to None) :param ignore_biomass: boolean, whether to ignore the biomass reaction (and its stoichiometry preserving constraint) :return: tuple (r_id2g_eq, s_id2gr_id, s_id2chebi_id, ub_s_ids): dict {reaction_id: reaction_group_id}, dict {species_id: species_group_id}, dict {species_id: ChEBI_term_id}, collection of ubiquitous species_ids. """ # input_model input_doc = libsbml.SBMLReader().readSBML(in_sbml) input_model = input_doc.getModel() r_ids_to_ignore = get_biomass_r_ids( input_model) if ignore_biomass else None remove_is_a_reactions(input_model) annotate_metabolites(input_model, chebi) # TODO: fix comp separation # separate_boundary_metabolites(input_model) remove_unused_elements(input_model) logging.info("mapping species to ChEBI") s_id2chebi_id = get_species_id2chebi_id(input_model) ub_chebi_ids, ub_s_ids = get_ub_elements(input_model, chebi, s_id2chebi_id, ub_chebi_ids, ub_s_ids) terms = (t for t in (chebi.get_term(t_id) for t_id in s_id2chebi_id.values()) if t) old_onto_len = len(chebi) filter_ontology(chebi, terms, relationships=EQUIVALENT_RELATIONSHIPS, min_deepness=3) logging.info('Filtered the ontology from %d terms to %d' % (old_onto_len, len(chebi))) threshold = min(max(3, int(0.1 * input_model.getNumReactions())), UBIQUITOUS_THRESHOLD) s_id2clu, ub_s_ids = generalize_species(input_model, s_id2chebi_id, ub_s_ids, chebi, ub_chebi_ids, threshold, r_ids_to_ignore=r_ids_to_ignore) logging.info("generalized species") r_id2clu = generalize_reactions(input_model, s_id2clu, s_id2chebi_id, ub_chebi_ids, r_ids_to_ignore=r_ids_to_ignore) logging.info("generalized reactions") clu2s_ids = {(c_id, term): s_ids for ((c_id, (term, )), s_ids) in invert_map(s_id2clu).items()} r_id2g_eq, s_id2gr_id = save_as_comp_generalized_sbml( input_model, out_sbml, groups_sbml, r_id2clu, clu2s_ids, ub_s_ids, chebi) return r_id2g_eq, s_id2gr_id, s_id2chebi_id, ub_s_ids