def extract_data_sbml(sbml_filepath): reader = libsbml.SBMLReader() document = reader.readSBML(sbml_filepath) model = document.getModel() compounds = model.getListOfSpecies() reactions = model.getListOfReactions() genes = [] for reactionSBML in reactions: notes = sbmlPlugin.parseNotes(reactionSBML) if "GENE_ASSOCIATION" in list(notes.keys()): # Using sbmlPlugin to recover all genes associated to the reaction for gene in sbmlPlugin.parseGeneAssoc( notes["GENE_ASSOCIATION"][0]): if gene not in genes: genes.append(gene) id_compounds = [ sbmlPlugin.convert_from_coded_id(compound.id)[0] for compound in compounds ] id_reactions = [ sbmlPlugin.convert_from_coded_id(reaction.id)[0] for reaction in reactions ] return genes, id_compounds, id_reactions
def reduce_network(padmet_file:str, empty_padmet:str, reaction_list:list, sbml_output:str, del_cof:bool=False): """Create a sbml starting with the desired reactions. Args: padmet_file (str): path to padmet containing all reactions empty_padmet (str): path to empty padmet that will be filled reaction_list (list): list of reactions to be retrieved sbml_output (str): path to sbml file to be written """ p_ref = PadmetRef(padmet_file) p_spec = PadmetSpec(empty_padmet) # retrieve reactions from a given pathway # rxn_list = [rlt.id_in for rlt in p_ref.dicOfRelationOut[args.pathway] if rlt.type == "is_in_pathway" and p_ref.dicOfNode[rlt.id_in].type == "reaction"] reaction_list = [convert_from_coded_id(i)[0] for i in reaction_list] for rxn_id in reaction_list: p_spec.copyNode(p_ref, rxn_id) # p_spec.generateFile("plop.padmet") cofactor_list = [convert_from_coded_id(i)[0] for i in COFACTORS] if del_cof: for rxn_id in reaction_list: cof_linked_rlt = [rlt for rlt in p_spec.dicOfRelationIn[rxn_id] if rlt.id_out in cofactor_list] for rel in cof_linked_rlt: p_spec._delRelation(rel) padmet_to_sbml(p_spec, sbml_output, sbml_lvl=3, verbose=True) return
def test_compare_sbml(): fabo_1_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) fabo_1_padmetSpec.delNode('ACYLCOASYN-RXN') padmet_to_sbml(fabo_1_padmetSpec, 'fabo_1.sbml') fabo_2_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) fabo_2_padmetSpec.delNode('ACYLCOADEHYDROG-RXN') padmet_to_sbml(fabo_2_padmetSpec, 'fabo_2.sbml') compare_multiple_sbml('fabo_1.sbml,fabo_2.sbml', 'output_folder') reactions_fabo_1 = [] reactions_fabo_2 = [] with open('output_folder/reactions.tsv', 'r') as reactions_file: csvreader = csv.reader(reactions_file, delimiter='\t') for row in csvreader: if row[1] == '1': reactions_fabo_1.append(row[0]) if row[2] == '1': reactions_fabo_2.append(row[0]) expected_fabo_1_rxns = [ rxn for rxn in FABO_RXNS if rxn != 'ACYLCOASYN-RXN' ] expected_fabo_2_rxns = [ rxn for rxn in FABO_RXNS if rxn != 'ACYLCOADEHYDROG-RXN' ] assert set(expected_fabo_1_rxns).issubset(set(reactions_fabo_1)) assert set(expected_fabo_2_rxns).issubset(set(reactions_fabo_2)) metabolites_fabo_1 = [] metabolites_fabo_2 = [] with open('output_folder/metabolites.tsv', 'r') as metabolites_file: csvreader = csv.reader(metabolites_file, delimiter='\t') for row in csvreader: if row[1] == '1': metabolites_fabo_1.append( sbmlPlugin.convert_from_coded_id(row[0])[0]) if row[2] == '1': metabolites_fabo_2.append( sbmlPlugin.convert_from_coded_id(row[0])[0]) metabolites_fabo_1 = list(set(metabolites_fabo_1)) metabolites_fabo_2 = list(set(metabolites_fabo_2)) assert set(FABO_CPDS).issubset(set(metabolites_fabo_1 + metabolites_fabo_2)) os.remove('fabo_1.sbml') os.remove('fabo_2.sbml') shutil.rmtree('output_folder')
def parse_compounds_sbml(sbml_file, hide_metabolites): """ Parse sbml files to extract compounds to create edges and nodes for igraph. Parameters ---------- sbml_file: str pathname of the sbml file hide_metabolites: list list of metabolites to hide Returns ------- edges: list edges between two compounds (symbolizing the reaction) edges_label: list for each edge the name of the reaction weights: list the weight associated to each edge nodes: list a compound nodes_label: list for each node the name of the compound """ sbml_model = read_sbml_model(sbml_file) edges = [] edges_label = [] weights = [] nodes = {} nodes_label = [] for reaction in sbml_model.reactions: for reactant in reaction.reactants: reactant = convert_from_coded_id(reactant.id)[0] if reactant not in hide_metabolites: for product in reaction.products: product = convert_from_coded_id(product.id)[0] if product not in hide_metabolites: if reactant not in nodes: new_cpd_id = len(nodes_label) nodes_label.append(reactant) nodes[reactant] = new_cpd_id if product not in nodes: new_cpd_id = len(nodes_label) nodes_label.append(product) nodes[product] = new_cpd_id edges.append((nodes[reactant], nodes[product])) weights.append(1) edges_label.append(reaction.id) if reaction.reversibility == True: edges.append((nodes[product], nodes[reactant])) weights.append(1) edges_label.append(reaction.id) return edges, edges_label, weights, nodes, nodes_label
def run_analysis(inp_file, json_onto_file, out_file, encoded): """get families of a list of compounds Args: inp_file (str): path to input dir json_onto_file (str): json ontology file for compounds output_dir (str): path to output dir encoded (Bool): encoding of the metabolites names as SBML IDs """ with open(json_onto_file, "r") as f: family_dict = json.load(f) res_dict = {} # git list of compounds to identify with open(inp_file, "r") as f: compounds_raw = [i.strip("\n") for i in f.readlines()] if encoded: compounds = [convert_from_coded_id(i)[0] for i in compounds_raw] else: compounds = compounds_raw for elem in compounds: if elem in family_dict: res_dict[elem] = family_dict[elem] else: res_dict[elem] = ["Others"] # write to file the ontology of compounds in comm_scope for each dir with open(out_file, "w") as g: for cpd in res_dict: g.write(cpd + '\t' + ",".join(res_dict[cpd]) + '\n') return
def create_species_sbml(metabolites, outputfile): """Create a SBML files with a list of species containing metabolites of the input set Args: metabolites (set): set of metabolites outputfile (str): SBML file to be written """ document = libsbml.SBMLDocument(2, 1) model = document.createModel("metabolites") for compound in metabolites: compound = compound.strip('"') name, stype, comp = convert_from_coded_id(compound) s = model.createSpecies() sbmlGenerator.check(s, 'create species') sbmlGenerator.check(s.setId(compound), 'set species id') # Add name and compartment if found by padmet if name is not None: sbmlGenerator.check(s.setName(name), 'set species name') elif name is None: logger.warning("No name for " + compound) if comp is not None: sbmlGenerator.check(s.setCompartment(comp), 'set species compartment') elif comp is None: logger.warning("No compartment for " + compound) libsbml.writeSBMLToFile(document, outputfile)
def test_extract_rxn_with_gene_assoc_cli(): subprocess.call([ 'padmet', 'pgdb_to_padmet', '--pgdb', 'test_data/pgdb', '--output', 'test.padmet', '--extract-gene' ]) subprocess.call([ 'padmet', 'sbmlGenerator', '--padmet', 'test.padmet', '--output', 'fabo.sbml' ]) # Extract reactions with only genes association so 2.3.1.49-RXN should not be here. subprocess.call([ 'padmet', 'extract_rxn_with_gene_assoc', '--sbml', 'fabo.sbml', '--output', 'fabo_rxn_with_genes.sbml' ]) reader = libsbml.SBMLReader() document = reader.readSBML('fabo_rxn_with_genes.sbml') model = document.getModel() reactions = model.getListOfReactions() id_reactions = [ sbmlPlugin.convert_from_coded_id(reaction.id)[0] for reaction in reactions ] os.remove('test.padmet') os.remove('fabo.sbml') os.remove('fabo_rxn_with_genes.sbml') assert '2.3.1.49-RXN' not in id_reactions
def compare_sbml_padmet(sbml_document, padmet): """ compare reactions ids in sbml vs padmet, return nb of reactions in both and reactions id not in sbml or not in padmet Parameters ---------- padmet: padmet.classes.PadmetSpec padmet to udpate sbml_file: libsbml.document sbml document """ sbml_model = sbml_document.getModel() sbml_listOfReactions = set([ sp.convert_from_coded_id(r.getId())[0] for r in sbml_model.getListOfReactions() ]) padmet_reaction = set([ node.id for node in padmet.dicOfNode.values() if node.type == "reaction" ]) diff = sbml_listOfReactions.difference(padmet_reaction) diff_inv = padmet_reaction.difference(sbml_listOfReactions) print("%s reaction in sbml" % len(sbml_listOfReactions)) print("%s reaction in padmet" % len(padmet_reaction)) print("%s reaction in sbml not in padmet" % len(diff)) for i in diff: print("\t%s" % i) print("%s reaction in padmet not in sbml" % len(diff_inv)) for i in diff_inv: print("\t%s" % i)
def test_m2m_recon_call(): """ Test m2m recon when called in terminal. """ subprocess.call(['mpwt', '--delete', 'fatty_acid_beta_oxydation_icyc']) subprocess.call([ 'm2m', 'recon', '-g', 'recon_data', '-o', 'recon_data_output', '-c', '1', '-p' ]) reader = SBMLReader() document = reader.readSBML( 'recon_data_output/sbml/fatty_acid_beta_oxydation_I.sbml') expected_fabo_reactions = [ convert_from_coded_id(reaction.getId())[0] for reaction in document.getModel().getListOfReactions() ] assert set(fabo_reactions()).issubset(set(expected_fabo_reactions)) padmet = PadmetSpec( 'recon_data_output/padmet/fatty_acid_beta_oxydation_I.padmet') fabo_rxns = [ node.id for node in padmet.dicOfNode.values() if node.type == "reaction" ] assert set(fabo_reactions()).issubset(set(fabo_rxns)) shutil.rmtree('recon_data_output') subprocess.call(['mpwt', '--delete', 'fatty_acid_beta_oxydation_icyc'])
def create_sbml_stat(species_name, sbml_file): """Extract reactions/pathways/compounds/genes from a sbml file. Args: species_name (str): species names sbml_file (str): path to a sbml file Returns list: [species name, list of genes, list of reactions, list of reactions associated with genes, list of compounds] """ tree = etree.parse(sbml_file) sbml = tree.getroot() genes = [] reactions = [] gene_associated_rxns = [] compounds = [] for e in sbml: if e.tag[0] == "{": uri, tag = e.tag[1:].split("}") else: tag = e.tag if tag == "model": model_element = e for els in model_element: if 'listOfSpecies' in els.tag: for el in els: compounds.append( sbmlPlugin.convert_from_coded_id(el.get('metaid'))[0]) if 'listOfReactions' in els.tag: for el in els: reaction_id = sbmlPlugin.convert_from_coded_id(el.get('id'))[0] reactions.append(reaction_id) for subel in el.getchildren(): if 'notes' in subel.tag: for subsubel in subel.getchildren(): for subsubsubel in subsubel.getchildren(): if 'GENE_ASSOCIATION' in subsubsubel.text: for gene in sbmlPlugin.parseGeneAssoc( subsubsubel.text): genes.append( gene.replace( 'GENE_ASSOCIATION:', '')) if reaction_id not in gene_associated_rxns: gene_associated_rxns.append( reaction_id) return [species_name, genes, reactions, gene_associated_rxns, compounds]
def test_m2m_recon_call(): """ Test m2m recon when called in terminal. """ sbml_file_path = os.path.join( *['recon_data_output', 'sbml', 'fatty_acid_beta_oxydation_I.sbml']) padmet_path = os.path.join( *['recon_data_output', 'padmet', 'fatty_acid_beta_oxydation_I.padmet']) subprocess.call(['mpwt', '--delete', 'fatty_acid_beta_oxydation_icyc']) subprocess.call([ 'm2m', 'recon', '-g', 'recon_data', '-o', 'recon_data_output', '-c', '1', '-p' ]) reader = SBMLReader() document = reader.readSBML(sbml_file_path) expected_fabo_reactions = [ convert_from_coded_id(reaction.getId())[0] for reaction in document.getModel().getListOfReactions() ] assert set(get_fabo_reactions()).issubset(set(expected_fabo_reactions)) padmet = PadmetSpec(padmet_path) fabo_rxns = [ node.id for node in padmet.dicOfNode.values() if node.type == "reaction" ] assert set(get_fabo_reactions()).issubset(set(fabo_rxns)) shutil.rmtree('recon_data_output') subprocess.call([ 'm2m', 'recon', '-g', 'recon_data', '-o', 'recon_data_output', '-c', '1', '--pwt-xml' ]) reader = SBMLReader() document = reader.readSBML(sbml_file_path) # Extract reaction ID from annotaiton. fabo_reactions = [ reaction.name for reaction in document.getModel().getListOfReactions() ] known_fabo_reactions = get_fabo_reactions() results = {} for known_fabo_reaction in known_fabo_reactions: presence_reaction = sum([ 1 if known_fabo_reaction in fabo_reaction else 0 for fabo_reaction in fabo_reactions ]) if presence_reaction > 0: results[known_fabo_reaction] = True assert all(results.values()) shutil.rmtree('recon_data_output') subprocess.call(['mpwt', '--delete', 'fatty_acid_beta_oxydation_icyc'])
def fba_on_targets(allspecies, model): """ for each specie in allspecies, create an objective function with the current species as only product and try to optimze the model and get flux. Parameters ---------- allSpecies: list list of species ids to test model: cobra.model Cobra model from a sbml file """ #dict_output = {"positive":{},"negative":{}} for species in allspecies: #lets create a copy of the initial model model2 = model.copy() #remove all obj coef for rxn in model2.reactions: if rxn.objective_coefficient == 1.0: rxn.objective_coefficient = 0.0 #Create a new reaction that consume the given species FBA_rxn = Reaction("FBA_TEST") FBA_rxn.lower_bound = 0 FBA_rxn.upper_bound = 1000 model2.add_reactions([FBA_rxn]) FBA_rxn.objective_coefficient = 1.0 metabolitedict = {} metabolitedict[species] = -1.0 FBA_rxn.add_metabolites(metabolitedict) solution = model2.optimize() if (solution.objective_value > 1e-5): print("%s // %s %s positive" % (species, convert_from_coded_id(species.id)[0] + "_" + convert_from_coded_id(species.id)[2], solution.objective_value)) else: print("%s // %s %s NULL" % (species, convert_from_coded_id(species.id)[0] + "_" + convert_from_coded_id(species.id)[2], solution.objective_value))
def test_sbmlPlugin(): # test convert_from_coded_id coded = "R_RXN__45__11921" assert sbmlPlugin.convert_from_coded_id(coded) == ("RXN-11921", "R", None) coded = "R_R00332_c" assert sbmlPlugin.convert_from_coded_id(coded) == ("R00332", "R", "c") coded = "R_SUCCt2_2" assert sbmlPlugin.convert_from_coded_id(coded) == ("SUCCt2_2", "R", None) coded = "S_N6_45__40_L_45_1_44_3_45_Dicarboxypropyl_41__45_L_45_lysine_c" assert sbmlPlugin.convert_from_coded_id(coded, pattern="_", species_tag="S") == ( "N6-(L-1,3-Dicarboxypropyl)-L-lysine", "S", "c", ) coded = "S__40_2R_41__45_2_45_Hydroxy_45_3_45__40_phosphonooxy_41__45_propanal_c" assert sbmlPlugin.convert_from_coded_id(coded, pattern="_", species_tag="S") == ( "(2R)-2-Hydroxy-3-(phosphonooxy)-propanal", "S", "c", ) coded = "M_citr_L_m" assert sbmlPlugin.convert_from_coded_id(coded) == ("citr_L", "M", "m")
def test_extract_rxn_with_gene_assoc(): fabo_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) padmet_to_sbml(fabo_padmetSpec, 'fabo.sbml') # Extract reactions with only genes association so 2.3.1.49-RXN should not be here. extract_rxn_with_gene_assoc('fabo.sbml', 'fabo_rxn_with_genes.sbml', verbose=False) reader = libsbml.SBMLReader() document = reader.readSBML('fabo_rxn_with_genes.sbml') model = document.getModel() reactions = model.getListOfReactions() id_reactions = [ sbmlPlugin.convert_from_coded_id(reaction.id)[0] for reaction in reactions ] os.remove('fabo.sbml') os.remove('fabo_rxn_with_genes.sbml') assert '2.3.1.49-RXN' not in id_reactions
def updateFromSbml(self, sbml_file, verbose=False): """ Initialize a padmetRef from sbml. Copy all species, convert id with sbmlPlugin stock name in COMMON NAME. Copy all reactions, convert id with sbmlPlugin, stock name in common name, stock compart and stoichio data relative to reactants and products in the misc of consumes/produces relations Parameters ---------- sbml_file: str pathname of the sbml file verbose: bool if True print supp info """ if not os.path.exists(sbml_file): raise FileNotFoundError("No SBML file accessible at " + sbml_file) # using libSbml to read sbml_file if verbose: print("loading sbml file: %s" % sbml_file) reader = libsbml.SBMLReader() document = reader.readSBML(sbml_file) for i in range(document.getNumErrors()): print(document.getError(i).getMessage()) model = document.getModel() listOfSpecies = model.getListOfSpecies() listOfReactions = model.getListOfReactions() nbReactions = len(listOfReactions) nbSpecies = len(listOfSpecies) if verbose: print("nb species: %s" % nbSpecies) print("nb reactions: %s" % nbReactions) if verbose: print("creating species") for specie in listOfSpecies: specie_id_encoded = specie.getId() specie_id = sbmlPlugin.convert_from_coded_id(specie_id_encoded)[0] if verbose: print("specie: %s, uncoded: %s" % (specie_id_encoded, specie_id)) try: self.dicOfNode[specie_id] if verbose: print("already in padmetRef") except KeyError: specie_name = specie.getName() if specie_name: specie_node = Node( "compound", specie_id, {"COMMON-NAME": [specie_name]} ) else: specie_node = Node("compound", specie_id) self.dicOfNode[specie_id] = specie_node if verbose: print("creating reactions") for reaction in listOfReactions: reaction_id_encoded = reaction.getId() reaction_id = sbmlPlugin.convert_from_coded_id(reaction_id_encoded)[0] if verbose: print("reaction: %s, uncoded: %s" % (reaction_id_encoded, reaction_id)) try: self.dicOfNode[reaction_id] if verbose: print("already in padmetRef") except KeyError: reaction_name = reaction.getName() if reaction.getReversible(): reaction_dir = "REVERSIBLE" else: reaction_dir = "LEFT-TO-RIGHT" if reaction_name: reaction_node = Node( "reaction", reaction_id, {"COMMON-NAME": [reaction_name], "DIRECTION": [reaction_dir]}, ) else: reaction_node = Node( "reaction", reaction_id, {"DIRECTION": [reaction_dir]} ) self.dicOfNode[reaction_id] = reaction_node reactants = reaction.getListOfReactants() for reactant in reactants: reactant_id, x, reactant_compart = sbmlPlugin.convert_from_coded_id( reactant.getSpecies() ) if reactant_compart is None: if verbose: print("%s has no compart, set to 'c'" % reactant) reactant_compart = "c" reactant_stoich = reactant.getStoichiometry() consumes_rlt = Relation( reaction_id, "consumes", reactant_id, { "STOICHIOMETRY": [reactant_stoich], "COMPARTMENT": [reactant_compart], }, ) self._addRelation(consumes_rlt) products = reaction.getListOfProducts() for product in products: product_id, x, product_compart = sbmlPlugin.convert_from_coded_id( product.getSpecies() ) if product_compart is None: if verbose: print("%s has no compart, set to 'c'" % product) product_compart = "c" product_stoich = product.getStoichiometry() produces_rlt = Relation( reaction_id, "produces", product_id, { "STOICHIOMETRY": [product_stoich], "COMPARTMENT": [product_compart], }, ) self._addRelation(produces_rlt)
def enhance_db(metabolic_reactions, padmet, with_genes, verbose = False): """ Parse sbml metabolic_reactions and add reactions in padmet if with_genes: add also genes information Parameters ---------- metabolic_reactions: str path to sbml metabolic-reactions.xml padmet: padmet.PadmetRef padmet instance with_genes: bool if true alos add genes information. Returns ------- padmet.padmetRef: padmet instance with pgdb within pgdb + metabolic-reactions.xml data """ print("loading sbml file: %s" %metabolic_reactions) reader = libsbml.SBMLReader() document = reader.readSBML(metabolic_reactions) for i in range(document.getNumErrors()): print(document.getError(i).getMessage()) model = document.getModel() listOfReactions = model.getListOfReactions() #recovere the reactions that are not in the basic metacyc but in the sbml file #use the reactions_name instead of ids because the ids are encoded, the name is the non-encoded version of the id padmet_reactions_id = set([node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"]) reaction_to_add = [reaction for reaction in listOfReactions if reaction.getName() not in padmet_reactions_id] count = 0 if verbose: print(str(len(reaction_to_add))+" reactions to add") for reactionSBML in reaction_to_add: count += 1 reaction_id = reactionSBML.getName() if verbose: print(str(count)+"/"+str(len(reaction_to_add))+"\t"+reaction_id) if reactionSBML.getReversible(): reaction_dir = "REVERSIBLE" else: reaction_dir = "LEFT-TO-RIGHT" try: reaction_node = padmet.dicOfNode[reaction_id] except KeyError: reaction_node = Node("reaction", reaction_id, {"DIRECTION": [reaction_dir]}) padmet.dicOfNode[reaction_id] = reaction_node reactants = reactionSBML.getListOfReactants() for reactant in reactants: #convert ids reactant_id, _type, reactant_compart = sbmlPlugin.convert_from_coded_id(reactant.getSpecies()) if reactant_id not in list(padmet.dicOfNode.keys()): reactant_node = Node("compound",reactant_id) padmet.dicOfNode[reaction_id] = reactant_node reactant_stoich = reactant.getStoichiometry() consumes_rlt = Relation(reaction_id,"consumes",reactant_id, {"STOICHIOMETRY":[reactant_stoich], "COMPARTMENT": [reactant_compart]}) list_of_relation.append(consumes_rlt) products = reactionSBML.getListOfProducts() for product in products: product_id, _type, product_compart = sbmlPlugin.convert_from_coded_id(product.getSpecies()) if product_id not in list(padmet.dicOfNode.keys()): product_node = Node("compound",product_id) padmet.dicOfNode[product_id] = product_node product_stoich = product.getStoichiometry() produces_rlt = Relation(reaction_id,"produces",product_id,{"STOICHIOMETRY": [product_stoich], "COMPARTMENT": [product_compart]}) list_of_relation.append(produces_rlt) if with_genes: notes = sbmlPlugin.parseNotes(reactionSBML) if "GENE_ASSOCIATION" in list(notes.keys()): #Using sbmlPlugin to recover all genes associated to the reaction listOfGenes = sbmlPlugin.parseGeneAssoc(notes["GENE_ASSOCIATION"][0]) if len(listOfGenes) != 0: for gene in listOfGenes: try: #check if gene already in the padmet padmet.dicOfNode[gene] except TypeError: gene_node = Node("gene",gene) padmet.dicOfNode[gene] = gene_node is_linked_rlt = Relation(reaction_id, "is_linked_to", gene) list_of_relation.append(is_linked_rlt) return padmet
def sbml_to_curation(sbml_file, rxn_list, output, extract_gene=False, comment="N.A", verbose=False): """ Read a sbml file, check if each reaction ids are in the sbml, if no, raise ValueError Then create the form. this form can then be used with manual_curation.py Parameters ---------- sbml_file: str path to sbml file rxn_list: list list of reaction id, ids must be identic as in the sbml, carrefull to encoded ids. output: str path to the form to create extract_gene: bool if true extract genes association comment: str Comment why the reaction will be added in the network for traceability. verbose: bool if True print information """ if not os.path.exists(sbml_file): raise FileNotFoundError( "No SBML file (--sbml/sbml_file) accessible at " + sbml_file) reader = libsbml.SBMLReader() document = reader.readSBML(sbml_file) for i in range(document.getNumErrors()): print(document.getError(i).getMessage()) model = document.getModel() listOfReactions = model.getListOfReactions() #check if reactions id are in model. if verbose: print("Check if reaction(s) are in sbml file") for rxn_id in rxn_list: if rxn_id in [r.id for r in listOfReactions]: if verbose: print("reaction %s found" % rxn_id) else: raise ValueError("/!\ reaction %s not found" % rxn_id) #create form output with open(output, 'w') as f: for rxn_id in rxn_list: rxn_sbml = listOfReactions.getElementBySId(rxn_id) rxn_id_decoded = convert_from_coded_id(rxn_id)[0] if verbose: print("extracting reaction %s, decoded id as %s" % (rxn_id, rxn_id_decoded)) line = ["reaction_id", rxn_id_decoded] line = "\t".join(line) + "\n" f.write(line) line = ["comment", comment] line = "\t".join(line) + "\n" f.write(line) if rxn_sbml.reversible: line = ["reversible", "true"] else: line = ["reversible", "false"] line = "\t".join(line) + "\n" f.write(line) #check if have gene assoc if extract_gene: try: gene_assoc = parseNotes(rxn_sbml)["GENE_ASSOCIATION"][0] line = ["linked_gene", gene_assoc] except KeyError: line = ["linked_gene", ""] else: line = ["linked_gene", ""] line = "\t".join(line) + "\n" f.write(line) line = ["#reactant/product", "#stoichio:compound_id:compart"] line = "\t".join(line) + "\n" f.write(line) reactants = rxn_sbml.getListOfReactants() products = rxn_sbml.getListOfProducts() for reactant in reactants: stoich = str(abs(reactant.getStoichiometry())) reactant_id, x, compart = convert_from_coded_id( reactant.getSpecies()) line = ":".join([stoich, reactant_id, compart]) line = "reactant" + "\t" + line + "\n" f.write(line) for product in products: stoich = str(abs(product.getStoichiometry())) product_id, x, compart = convert_from_coded_id( product.getSpecies()) line = ":".join([stoich, product_id, compart]) line = "product" + "\t" + line + "\n" f.write(line) f.write("\n")
def flux_analysis(sbml_file, seeds_file=None, targets_file=None, all_species=False): """ 1./ Run flux balance analyse with cobra package on an already defined reaction. Need to set in the sbml the value 'objective_coefficient' to 1. If the reaction is reachable by flux: return the flux value and the flux value for each reactant of the reaction. If not: only return the flux value for each reactant of the reaction. If a reactant has a flux of '0' this means that it is not reachable by flux (and maybe topologically). To unblock the reaction it is required to fix the metabolic network by adding/removing reactions until all reactant are reachable. 2./If seeds and targets given as sbml files with only compounds. Will also try to use the Menetools library to make a topologicall analysis. Topological reachabylity of the targets compounds from the seeds compounds. 3./ If --all_species: will test flux reachability of all the compounds in the metabolic network (may take several minutes) Parameters ---------- sbml_file: str path to sbml file to analyse seeds_file: str path to sbml file with only compounds representing the seeds/growth medium targets_file: str path to sbml file with only compounds representing the targets to reach all_species: bool if True will try to create obj function for each compound and return which are reachable by flux. """ if targets_file: if not os.path.exists(targets_file): raise FileNotFoundError("No target SBML file accessible at " + targets_file) targets = read_sbml_model(targets_file).metabolites if seeds_file: if not os.path.exists(seeds_file): raise FileNotFoundError("No seeds SBML file accessible at " + seeds_file) if not os.path.exists(sbml_file): raise FileNotFoundError("No target SBML file accessible at " + sbml_file) model = read_sbml_model(sbml_file) #nb metabolites real_metabolites = set( [i.id.replace("_" + i.compartment, "") for i in model.metabolites]) rxn_with_ga = [i for i in model.reactions if i.gene_reaction_rule] print("#############") print("Model summary") print("Number of compounds: %s" % len(real_metabolites)) print("Number of reactions: %s" % len(model.reactions)) print("Number of genes: %s" % len(model.genes)) print("Ratio rxn with genes/rxns: %s%%" % (100 * len(rxn_with_ga) / len(model.reactions))) # Launch a topoligical analysis if menetools is installed. if seeds_file and targets_file: print("#############") print("Analyzing targets") print("#Topological analysis") try: from menetools import run_menecheck menetools_result = run_menecheck(draft_sbml=sbml_file, seeds_sbml=seeds_file, targets_sbml=targets_file) print("Number of targets: %s" % (len(targets))) print("Unproductible targets: " + ",".join(menetools_result[0])) print("Productible targets: " + ",".join(menetools_result[1])) except ImportError: print( "Menetools is not installed. Can't run topological analysis.") print("#Flux Balance Analysis") fba_on_targets(targets, model) if all_species: targets = model.metabolites print( "#Flux Balance Analysis on all model metabolites (long process...)" ) fba_on_targets(targets, model) return try: biomassrxn = [ rxn for rxn in model.reactions if rxn.objective_coefficient == 1.0 ][0] biomassname = biomassrxn.id except IndexError: print( "Need to set OBJECTIVE COEFFICIENT to '1.0' for the reaction to test" ) exit() print("#############") print("Computing optimization") solution = model.optimize() print("Testing reaction %s" % biomassname) print("Growth rate: %s" % solution.objective_value) print("Status: %s" % solution.status) model.summary() if (solution.objective_value > 1e-5): blocked = cobra_flux_analysis.find_blocked_reactions( model, model.reactions) essRxns = cobra_flux_analysis.find_essential_reactions(model) essGenes = cobra_flux_analysis.find_essential_genes(model) print('FVA analysis:') print('\tBlocked reactions: %s' % len(blocked)) print('\tEssential reactions: %s' % len(essRxns)) [print(rxn.id) for rxn in essRxns] print('\tEssential genes: %s' % len(essGenes)) #get biomass rxn reactants bms_reactants = dict([(k, v) for k, v in list(biomassrxn.metabolites.items()) if v < 0]) bms_products = dict([(k, v) for k, v in list(biomassrxn.metabolites.items()) if v > 0]) dict_output = {"positive": {}, "negative": {}} #for each metabolite in reactant, create a biomass rxn with only this metabolite in reactants biomassrxn.objective_coefficient = 0.0 for reactant, stoich in list(bms_reactants.items()): test_rxn = Reaction("test_rxn") test_rxn.lower_bound = 0 test_rxn.upper_bound = 1000 metabolitedict = dict(bms_products) metabolitedict.update({reactant: stoich}) model.add_reactions([test_rxn]) test_rxn.add_metabolites(metabolitedict) test_rxn.objective_coefficient = 1.0 solution = model.optimize() if (solution.objective_value > 1e-5): dict_output["positive"][reactant] = solution.objective_value else: dict_output["negative"][reactant] = solution.objective_value model.remove_reactions([test_rxn]) print("%s/%s compounds with positive flux" % (len(list(dict_output["positive"].keys())), len(bms_reactants))) print("%s/%s compounds without flux" % (len(list(dict_output["negative"].keys())), len(bms_reactants))) for k, v in list(dict_output["positive"].items()): print("%s // %s %s positive" % (k, convert_from_coded_id(k.id)[0] + "_" + convert_from_coded_id(k.id)[2], v)) for k, v in list(dict_output["negative"].items()): print("%s // %s %s NULL" % (k, convert_from_coded_id(k.id)[0] + "_" + convert_from_coded_id(k.id)[2], v))
args = parser.parse_args() mn_dir = args.dir json_family = args.json outfile = args.output species_by_mn = {} for mn in os.listdir(mn_dir): print(mn) mn_name = mn.rstrip(".sbml") reader = SBMLReader() model = reader.readSBML(mn_dir + '/' + mn).getModel() species = [ convert_from_coded_id(i.getId())[0] for i in model.getListOfSpecies() ] species_by_mn[mn_name] = species with open(json_family, "r") as f: family_dict = json.load(f) # count_dict = {i:0 for i in list(set(sum(family_dict.values(), [])))} families_by_mn = {} for mn in species_by_mn: families_by_mn[mn] = { i: 0 for i in list(set(sum(family_dict.values(), []))) }
def create_sbml_stat(species_name, sbml_file): """Extract reactions/pathways/compounds/genes from a sbml file. Args: species_name (str): species names sbml_file (str): path to a sbml file Returns list: [species name, list of genes, list of reactions, list of reactions associated with genes, list of compounds] """ tree = etree.parse(sbml_file) sbml = tree.getroot() genes = [] reactions = [] gene_associated_rxns = [] fbc_gene_associated_rxns = [] fbc_rxn_associated_genes = [] compounds = [] for e in sbml: if e.tag[0] == "{": uri, tag = e.tag[1:].split("}") else: tag = e.tag if tag == "model": model_element = e for els in model_element: if 'listOfSpecies' in els.tag: for el in els: compounds.append(sbmlPlugin.convert_from_coded_id(el.get('metaid'))[0]) if 'listOfReactions' in els.tag: for el in els: reaction_id = sbmlPlugin.convert_from_coded_id(el.get('id'))[0] reactions.append(reaction_id) for subel in el.getchildren(): if 'notes' in subel.tag: for subsubel in subel.getchildren(): for subsubsubel in subsubel.getchildren(): if 'GENE_ASSOCIATION' in subsubsubel.text: for gene in sbmlPlugin.parseGeneAssoc(subsubsubel.text): if gene not in genes: genes.append(gene.replace('GENE_ASSOCIATION:', '')) if reaction_id not in gene_associated_rxns: gene_associated_rxns.append(reaction_id) # Use geneProductAssociation for xml from MetaFlux. elif 'geneProductAssociation' in subel.tag: for subsubel in subel.getchildren(): if 'geneProductRef' in subsubel.tag: gene = subsubel.get('{http://www.sbml.org/sbml/level3/version1/fbc/version2}geneProduct') if gene: gene = gene.replace('G_', '') if gene not in fbc_rxn_associated_genes: fbc_rxn_associated_genes.append(gene) if reaction_id not in fbc_gene_associated_rxns: fbc_gene_associated_rxns.append(reaction_id) else: for subsubsubel in subsubel.getchildren(): gene = subsubsubel.get('{http://www.sbml.org/sbml/level3/version1/fbc/version2}geneProduct') if gene: gene = gene.replace('G_', '') if gene not in fbc_rxn_associated_genes: fbc_rxn_associated_genes.append(gene) if reaction_id not in fbc_gene_associated_rxns: fbc_gene_associated_rxns.append(reaction_id) # For XML from MetaFlux, use genes from geneProductAssociation to get genes and reaction with genes. if len(genes) == 0: if len(fbc_rxn_associated_genes) > 0: genes = fbc_rxn_associated_genes if len(gene_associated_rxns) == 0: if len(fbc_gene_associated_rxns) > 0: gene_associated_rxns = fbc_gene_associated_rxns return [species_name, genes, reactions, gene_associated_rxns, compounds]