def padmet_medium_cli(command_args): args = docopt.docopt(__doc__, argv=command_args) if args["--seeds"]: seeds_file = args["--seeds"] if not os.path.exists(seeds_file): raise FileNotFoundError("No seeds file (--seeds) accessible at " + seeds_file) with open(seeds_file, 'r') as f: seeds = [line.split("\t")[0] for line in f.read().splitlines()] else: seeds = None padmet = PadmetSpec(args["--padmetSpec"]) if args["--padmetRef"]: padmetRef = PadmetRef(args["--padmetRef"]) else: padmetRef = None output = args["--output"] verbose = args["-v"] remove = args["-r"] if output is None: output = args["--padmetSpec"] if not remove and not seeds: g_m = padmet.get_growth_medium() print("List of growth medium:") if g_m: print(list(g_m)) else: print("[]") else: manage_medium(padmet, seeds, padmetRef, verbose) padmet.generateFile(output)
def create_padmet_instance(padmet_file, padmet_type, db, version, padmetRef=None): """ #TODO """ if padmet_type not in ["PadmetRef","PadmetSpec"]: raise TypeError('padmet_type must be in ["PadmetRef","PadmetSpec"], given:%s' %padmet_type) now = datetime.now() today_date = now.strftime("%Y-%m-%d") if padmet_type == "PadmetSpec": padmet = PadmetSpec() elif padmet_type == "PadmetRef": padmet = PadmetRef() if padmetRef: padmet.setInfo(padmetRef) padmet.info["PADMET"]["creation"] = today_date padmet.setPolicy(padmetRef) else: POLICY_IN_ARRAY = [['class','is_a_class','class'], ['class','has_name','name'], ['class','has_xref','xref'], ['class','has_suppData','suppData'], ['compound','is_a_class','class'], ['compound','has_name','name'], ['compound','has_xref','xref'], ['compound','has_suppData','suppData'], ['gene','is_a_class','class'], ['gene','has_name','name'], ['gene','has_xref','xref'], ['gene','has_suppData','suppData'], ['gene','codes_for','protein'], ['pathway','is_a_class','class'], ['pathway','has_name','name'], ['pathway','has_xref','xref'], ['pathway','is_in_pathway','pathway'], ['protein','is_a_class','class'], ['protein','has_name','name'], ['protein','has_xref','xref'], ['protein','has_suppData','suppData'], ['protein','catalyses','reaction'], ['protein','is_in_species','class'], ['reaction','is_a_class','class'], ['reaction','has_name','name'], ['reaction','has_xref','xref'], ['reaction','has_suppData','suppData'], ['reaction','has_reconstructionData','reconstructionData'], ['reaction','is_in_pathway','pathway'], ['reaction','consumes','class','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','class','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','consumes','compound','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','compound','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','consumes','protein','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','protein','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','is_linked_to','gene','SOURCE:ASSIGNMENT','X:Y']] dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}} padmet.setInfo(dbNotes) padmet.setPolicy(POLICY_IN_ARRAY) return padmet
def manual_curation_cli(command_args): args = docopt.docopt(__doc__, argv=command_args) data_file = args["--data"] output = args["--output"] verbose = args["-v"] if data_file: if not os.path.exists(data_file): raise FileNotFoundError("No form curation file (--data/data_file) accessible at " + data_file) filename = os.path.splitext(os.path.basename(data_file))[0] source = filename category = args["--category"] tool = args["--tool"] if args["--template_new_rxn"]: output = args["--template_new_rxn"] template_new_rxn(output) elif args["--template_add_delete_rxn"]: output = args["--template_add_delete_rxn"] template_add_delete(output) else: padmetSpec = PadmetSpec(args["--padmetSpec"]) if not output: output = args["--padmetSpec"] if args["--padmetRef"]: padmetRef = PadmetRef(args["--padmetRef"]) else: padmetRef = None to_do = sniff_datafile(data_file) if to_do == "rxn_creator": rxn_creator(data_file, padmetSpec, output, padmetRef, source, tool, category, verbose) elif to_do == "add_delete_rxn": add_delete_rxn(data_file, padmetSpec, output, padmetRef, source, tool, category, verbose)
def add_spontaneous_reactions(padmet_path, padmet_ref_path, output_padmet_path, only_complete_pathways=True): number_spontaneous_reactions = 0 padmetSpec = PadmetSpec(padmet_path) padmetRef = PadmetRef(padmet_ref_path) all_spontaneous_rxns = set([node.id for node in list(padmetRef.dicOfNode.values()) if node.type == "reaction" and "SPONTANEOUS" in node.misc]) for spontaneous_rxn_id in all_spontaneous_rxns: in_pwys = set([rlt.id_out for rlt in padmetRef.dicOfRelationIn.get(spontaneous_rxn_id,None) if rlt.type == "is_in_pathway"]) for pwy_id in in_pwys: if pwy_id in padmetSpec.dicOfNode.keys(): padmet_ref_in_rxns = set([rlt.id_in for rlt in padmetRef.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"]) padmet_spec_in_rxns = set([rlt.id_in for rlt in padmetSpec.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"]) if only_complete_pathways: difference_rxns = padmet_ref_in_rxns.difference(padmet_spec_in_rxns) if difference_rxns != set(): if difference_rxns.issubset(all_spontaneous_rxns): for difference_rxn in difference_rxns: if difference_rxn not in set([node.id for node in list(padmetSpec.dicOfNode.values()) if node.type == "reaction"]): padmetSpec.copyNode(padmetRef, difference_rxn) number_spontaneous_reactions += 1 else: if spontaneous_rxn_id not in set([node.id for node in list(padmetSpec.dicOfNode.values()) if node.type == "reaction"]): padmetSpec.copyNode(padmetRef, spontaneous_rxn_id) number_spontaneous_reactions += 1 padmetSpec.generateFile(output_padmet_path) print('Add {0} spontaneous reactions to {1}'.format(number_spontaneous_reactions, output_padmet_path))
def reduce_network(padmet_file:str, empty_padmet:str, reaction_list:list, sbml_output:str, del_cof:bool=False): """Create a sbml starting with the desired reactions. Args: padmet_file (str): path to padmet containing all reactions empty_padmet (str): path to empty padmet that will be filled reaction_list (list): list of reactions to be retrieved sbml_output (str): path to sbml file to be written """ p_ref = PadmetRef(padmet_file) p_spec = PadmetSpec(empty_padmet) # retrieve reactions from a given pathway # rxn_list = [rlt.id_in for rlt in p_ref.dicOfRelationOut[args.pathway] if rlt.type == "is_in_pathway" and p_ref.dicOfNode[rlt.id_in].type == "reaction"] reaction_list = [convert_from_coded_id(i)[0] for i in reaction_list] for rxn_id in reaction_list: p_spec.copyNode(p_ref, rxn_id) # p_spec.generateFile("plop.padmet") cofactor_list = [convert_from_coded_id(i)[0] for i in COFACTORS] if del_cof: for rxn_id in reaction_list: cof_linked_rlt = [rlt for rlt in p_spec.dicOfRelationIn[rxn_id] if rlt.id_out in cofactor_list] for rel in cof_linked_rlt: p_spec._delRelation(rel) padmet_to_sbml(p_spec, sbml_output, sbml_lvl=3, verbose=True) return
def sbml_to_padmetRef(sbml, padmetRef_file, output=None, db="NA", version="NA", verbose=False): """ if padmetRef, not padmetSpec: if padmetRef exist, instance PadmetRef else init PadmetRef update padmetRef if padmetSpec: if padmetRef, check if exist else raise Error if padmetSpec exist, instance PadmetSpec else init PadmetSpec update padmetSpec using padmetRef if padmetRef #TODO """ if output is None: output = padmetRef_file if os.path.isdir(sbml): sbml_files = [os.path.join(sbml,_f) for _f in next(os.walk(sbml))[2] if _f.endswith(".sbml") or _f.endswith(".xml")] else: sbml_files = sbml.split(";") if os.path.isfile(padmetRef_file): padmet_to_update = PadmetRef(padmetRef_file) else: padmet_to_update = create_padmet_instance(padmetRef_file, "PadmetRef", db, version) for sbml_file in sbml_files: if verbose: print("Updating padmet from %s" %os.path.basename(sbml_file)) padmet_to_update.updateFromSbml(sbml_file, verbose) padmet_to_update.generateFile(output)
def main(): args = docopt.docopt(__doc__) data_file = args["--data"] output = args["--output"] verbose = args["-v"] if data_file: filename = os.path.splitext(os.path.basename(data_file))[0] source = filename category = args["--category"] tool = args["--tool"] if args["--template_new_rxn"]: output = args["--template_new_rxn"] manual_curation.template_new_rxn(output) elif args["--template_add_delete_rxn"]: output = args["--template_add_delete_rxn"] manual_curation.template_add_delete(output) else: padmetSpec = PadmetSpec(args["--padmetSpec"]) if not output: output = args["--padmetSpec"] if args["--padmetRef"]: padmetRef = PadmetRef(args["--padmetRef"]) else: padmetRef = None to_do = manual_curation.sniff_datafile(data_file) if to_do == "rxn_creator": manual_curation.rxn_creator(data_file, padmetSpec, output, padmetRef, source, tool, category, verbose) elif to_do == "add_delete_rxn": manual_curation.add_delete_rxn(data_file, padmetSpec, output, padmetRef, source, tool, category, verbose)
def enhanced_meneco_output_cli(command_args): args = docopt.docopt(__doc__, argv=command_args) meneco_output_file = args["--meneco_output"] output = args["--output"] verbose = args["-v"] padmetRef = PadmetRef(args["--padmetRef"]) enhanced_meneco_output(meneco_output_file, padmetRef, output, verbose)
def main(): args = docopt.docopt(__doc__) meneco_output_file = args["--meneco_output"] output = args["--output"] verbose = args["-v"] padmetRef = PadmetRef(args["--padmetRef"]) enhanced_meneco_output.enhanced_meneco_output(meneco_output_file, padmetRef, output, verbose)
def main(): args = docopt.docopt(__doc__) output = args["--output"] verbose = args["-v"] if args["--padmetRef"]: padmetRef = PadmetRef(args["--padmetRef"]) else: padmetRef = None padmet_path = args["--padmet"] compare_padmet.compare_padmet(padmet_path, output, padmetRef, verbose)
def main(): args = docopt.docopt(__doc__) if args["--padmetRef"]: padmetRef = PadmetRef(args["--padmetRef"]) else: padmetRef = None output = args["--output"] verbose = args["-v"] to_add = args["--to_add"] padmet_to_padmet.padmet_to_padmet(to_add, output, padmetRef, verbose)
def compare_padmet_cli(command_args): args = docopt.docopt(__doc__, argv=command_args) output = args["--output"] verbose = args["-v"] if args["--padmetRef"]: padmetRef = PadmetRef(args["--padmetRef"]) else: padmetRef = None padmet_path = args["--padmet"] number_cpu = args["--cpu"] compare_padmet(padmet_path, output, padmetRef, verbose, number_cpu)
def main(): #files to upload: folder genomic_data, all sbml in output ortho, annot, external, seeds, targets args = docopt.docopt(__doc__) padmet = args["--padmet"] verbose = args["-v"] if args["--padmetRef"]: padmetRef = PadmetRef(args["--padmetRef"]) else: padmetRef = None wiki_id = args["--wiki_id"] output = args["--output"] log_file = args["--log_file"] database = args["--database"] wikiGenerator.wikiGenerator(padmet, output, wiki_id, padmetRef, database, log_file, verbose)
def sbmlGenerator_cli(command_args): args = docopt.docopt(__doc__, argv=command_args) output = args["--output"] obj_fct = args["--obj_fct"] mnx_chem_xref = args["--mnx_chem_xref"] mnx_chem_prop = args["--mnx_chem_prop"] sbml_lvl = args["--sbml_lvl"] model_id = args["--model_id"] verbose = args["-v"] if args["--padmet"]: padmet_file = args["--padmet"] if args["--init_source"]: init_source = args["--init_source"] from_init_source(padmet_file, init_source, output, verbose) else: padmet_to_sbml(padmet_file, output, model_id, obj_fct, sbml_lvl, mnx_chem_prop, mnx_chem_xref, verbose) elif args["--reaction"]: padmetRef = PadmetRef(args["--padmetRef"]) reactions = args["--reaction"] reaction_to_sbml(reactions, output, padmetRef, verbose) elif args["--compound"]: species_compart = args["--compound"] compound_to_sbml(species_compart, output, verbose)
def padmet_to_tsv(padmetSpec_file, padmetRef_file, output_dir, verbose=False): """ #TODO """ global all_rxn_nodes, all_cpd_nodes, all_pwy_nodes #check if output_dir exist, else create it if not output_dir.endswith("/"): output_dir += "/" if not os.path.isdir(output_dir): if verbose: print("Creating folder %s" % output_dir) os.makedirs(output_dir) #loading padmetSpec if padmetSpec_file: if verbose: print("Loading %s" % padmetSpec_file) padmetSpec = PadmetSpec(padmetSpec_file) padmetSpec_name = os.path.splitext( os.path.basename(padmetSpec_file))[0] padmetSpec_folder = output_dir + padmetSpec_name + "/" if not os.path.isdir(padmetSpec_folder): if verbose: print(("Creating folder %s" % padmetSpec_folder)) os.makedirs(padmetSpec_folder) #if padmetRef given, create folder for padmetRef if padmetRef_file: if verbose: print("Loading %s" % padmetRef_file) padmetRef = PadmetRef(padmetRef_file) padmetRef_name = os.path.splitext(os.path.basename(padmetRef_file))[0] padmetRef_folder = output_dir + padmetRef_name + "/" if not os.path.isdir(padmetRef_folder): if verbose: print("Creating folder %s" % padmetRef_folder) os.makedirs(padmetRef_folder) #NODES #Converting nodes data to tsv format #Except for names nodes: adding them as attribu if padmetRef_file: if verbose: print("Extracting nodes from %s" % padmetRef_name) with open(padmetRef_folder + "metabolic_network.tsv", 'w') as f: fieldnames = ["metabolic_network", "name"] writer = csv.writer(f, delimiter="\t") writer.writerow(fieldnames) writer.writerow([padmetRef_name, padmetRef_name]) if verbose: print("\tExtracting reactions") all_rxn_nodes = [ node for node in list(padmetRef.dicOfNode.values()) if node.type == "reaction" ] if all_rxn_nodes: extract_nodes(padmetRef, all_rxn_nodes, "reaction", padmetRef_folder + "rxn.tsv", {"in@metabolic_network": [padmetRef_name]}) if verbose: print("\t%s reactions" % len(all_rxn_nodes)) if verbose: print("\tExtracting compounds") all_cpd_nodes = set([ padmetRef.dicOfNode[rlt.id_out] for rlt in padmetRef.getAllRelation() if rlt.type in ["consumes", "produces"] ]) if all_cpd_nodes: extract_nodes(padmetRef, all_cpd_nodes, "compound", padmetRef_folder + "cpd.tsv") if verbose: print("\t%s compounds" % len(all_cpd_nodes)) if verbose: print("\tExtracting pathways") all_pwy_nodes = [ node for node in list(padmetRef.dicOfNode.values()) if node.type == "pathway" ] if all_pwy_nodes: extract_nodes(padmetRef, all_pwy_nodes, "pathway", padmetRef_folder + "pwy.tsv") if verbose: print("\t%s pathways" % len(all_pwy_nodes)) if verbose: print("\tExtracting xrefs") all_xrefs_nodes = [ node for node in list(padmetRef.dicOfNode.values()) if node.type == "xref" ] if all_xrefs_nodes: extract_nodes(padmetRef, all_xrefs_nodes, "xref", padmetRef_folder + "xref.tsv") if verbose: print("\t%s xrefs" % len(all_xrefs_nodes)) #RELATIONS #Converting relations data to tsv format if verbose: print("Extracting relations from %s" % padmetRef_name) rxn_cpd_data = [] rxn_pwy_data = [] entity_xref_data = [] if verbose: print( "\tExtracting relations reaction-[consumes/produces]-compound") if verbose: print("\tExtracting relations reaction-is_inclued_in-pathway") if verbose: print("\tExtracting relations reactions-has_xref-xref") for rxn_node in all_rxn_nodes: rxn_id = rxn_node.id #if verbose: print("Reaction %s" %rxn_id) #all consumes/produces relations cp_rlt = [ rlt for rlt in padmetRef.dicOfRelationIn[rxn_id] if rlt.type in ["consumes", "produces"] ] rxn_cpd_data += extract_rxn_cpd(cp_rlt) #all is_in_pathway relations pwy_rlt = [ rlt for rlt in padmetRef.dicOfRelationIn[rxn_id] if rlt.type == "is_in_pathway" ] if pwy_rlt: rxn_pwy_data += extract_rxn_pwy(pwy_rlt) #all has_xref relations rxn_xref_rlt = [ rlt for rlt in padmetRef.dicOfRelationIn[rxn_id] if rlt.type == "has_xref" ] if rxn_xref_rlt: entity_xref_data += extract_entity_xref( rxn_xref_rlt, padmetRef) if verbose: print("\tExtracting relations compound-has_xref-xref") for cpd_node in all_cpd_nodes: cpd_id = cpd_node.id try: cpd_xref_rlt = [ rlt for rlt in padmetRef.dicOfRelationIn[cpd_id] if rlt.type == "has_xref" ] if cpd_xref_rlt: entity_xref_data += extract_entity_xref( cpd_xref_rlt, padmetRef) except KeyError: pass if verbose: print("\tExtracting relations pwy-has_xref-xref") for pwy_node in all_pwy_nodes: pwy_id = pwy_node.id try: pwy_xref_rlt = [ rlt for rlt in padmetRef.dicOfRelationIn[pwy_id] if rlt.type == "has_xref" ] if pwy_xref_rlt: entity_xref_data += extract_entity_xref( pwy_xref_rlt, padmetRef) except KeyError: pass if rxn_cpd_data: if verbose: print("\t\tCreating rxn_cpd.tsv") rxn_cpd_file(rxn_cpd_data, padmetRef_folder + "rxn_cpd.tsv") if rxn_pwy_data: if verbose: print("\t\tCreating rxn_pwy.tsv") rxn_pwy_file(rxn_pwy_data, padmetRef_folder + "rxn_pwy.tsv") if entity_xref_data: if verbose: print("\t\tCreating entity_xref.tsv") entity_xref_file(entity_xref_data, padmetRef_folder + "entity_xref.tsv") else: if verbose: print("No given padmetRef") all_rxn_nodes, all_cpd_nodes, all_pwy_nodes, all_xref_nodes = [[]] * 4 if padmetSpec_file: if verbose: print("Extracting nodes from %s" % padmetSpec_name) with open(padmetSpec_folder + "metabolic_network.tsv", 'w') as f: fieldnames = ["metabolic_network", "name"] writer = csv.writer(f, delimiter="\t") writer.writerow(fieldnames) writer.writerow([padmetSpec_name, padmetSpec_name]) if verbose: print("\tExtracting reactions") spec_rxn_nodes = [ node for node in list(padmetSpec.dicOfNode.values()) if node.type == "reaction" ] if all_rxn_nodes: extract_nodes(padmetSpec, spec_rxn_nodes, "reaction", padmetSpec_folder + "rxn.tsv", {"in@metabolic_network": [padmetSpec_name]}) if verbose: print("\t%s reactions" % len(spec_rxn_nodes)) if verbose: print("\tExtracting compounds") spec_cpd_nodes = set([ padmetSpec.dicOfNode[rlt.id_out] for rlt in padmetSpec.getAllRelation() if rlt.type in ["consumes", "produces"] ]) if all_cpd_nodes: extract_nodes(padmetSpec, spec_cpd_nodes, "compound", padmetSpec_folder + "cpd.tsv") if verbose: print("\t%s compounds" % len(spec_cpd_nodes)) if verbose: print("\tExtracting pathways") spec_pwy_nodes = [ node for node in list(padmetSpec.dicOfNode.values()) if node.type == "pathway" ] if all_pwy_nodes: extract_nodes(padmetSpec, spec_pwy_nodes, "pathway", padmetSpec_folder + "pwy.tsv") if verbose: print("\t%s pathways" % len(spec_pwy_nodes)) if verbose: print("\tExtracting xrefs") spec_xrefs_nodes = [ node for node in list(padmetSpec.dicOfNode.values()) if node.type == "xref" ] if spec_xrefs_nodes: extract_nodes(padmetSpec, spec_xrefs_nodes, "xref", padmetSpec_folder + "xref.tsv") if verbose: print("\t%s xrefs" % len(spec_xrefs_nodes)) if verbose: print("\tExtracting all genes") spec_genes_nodes = [ node for node in list(padmetSpec.dicOfNode.values()) if node.type == "gene" ] if spec_genes_nodes: extract_nodes(padmetSpec, spec_genes_nodes, "gene", padmetSpec_folder + "gene.tsv", opt_col={"in@metabolic_network": [padmetSpec_name]}) if verbose: print("\t%s genes" % len(spec_genes_nodes)) if verbose: print("\tExtracting all reconstructionData") spec_recData_nodes = [ node for node in list(padmetSpec.dicOfNode.values()) if node.type == "reconstructionData" ] if spec_genes_nodes: extract_nodes(padmetSpec, spec_recData_nodes, "reconstructionData", padmetSpec_folder + "reconstructionData.tsv") if verbose: print("\t%s reconstructionData" % len(spec_recData_nodes)) if verbose: print("Extracting relations from %s" % padmetSpec_name) rxn_cpd_data = [] rxn_pwy_data = [] rxn_gene_data = [] entity_xref_data = [] rxn_rec_data = [] """ fieldnames = ["rxn_reconstruction_info","concers@reaction","has_metadata@reconstruction_information","concerns@metabolic_network"] with open(padmetSpec_folder+"rxn_sources.tsv", 'w') as f: writer = csv.writer(f, delimiter="\t") writer.writerow(fieldnames) for rxn_node in spec_all_rxn_nodes: for src in rxn_node.misc.get("SOURCE",[]): line = [rxn_node.id, src, padmetSpec_name] line.insert(0,"_".join(line)) writer.writerow(line) """ if verbose: print( "\tExtracting relations reaction-[consumes/produces]-compound") if verbose: print("\tExtracting relations reaction-is_in_pathway-pathway") if verbose: print("\tExtracting relations reactions-has_xref-xref") if verbose: print("\tExtracting relations reactions-is_linked_to-gene") if verbose: print( "\tExtracting relations reactions-has_metadata-reconstructionData" ) for rxn_node in spec_rxn_nodes: rxn_id = rxn_node.id #if verbose: print("Reaction %s" %rxn_id) #all consumes/produces relations rxn_cpd_rlt = [ rlt for rlt in padmetSpec.dicOfRelationIn[rxn_id] if rlt.type in ["consumes", "produces"] ] rxn_cpd_data += extract_rxn_cpd(rxn_cpd_rlt) #all is_in_pathway relations rxn_pwy_rlt = [ rlt for rlt in padmetSpec.dicOfRelationIn[rxn_id] if rlt.type == "is_in_pathway" ] rxn_pwy_data += extract_rxn_pwy(rxn_pwy_rlt) #all has_xref relations rxn_xref_rlt = [ rlt for rlt in padmetSpec.dicOfRelationIn[rxn_id] if rlt.type == "has_xref" ] entity_xref_data += extract_entity_xref(rxn_xref_rlt, padmetSpec) #all is_linked_to relations rxn_gene_rlt = [ rlt for rlt in padmetSpec.dicOfRelationIn[rxn_id] if rlt.type == "is_linked_to" ] rxn_gene_data += extract_rxn_gene(rxn_gene_rlt) #all reconstructionData rxn_rec_rlt = [ rlt for rlt in padmetSpec.dicOfRelationIn[rxn_id] if rlt.type == "has_reconstructionData" ] rxn_rec_data += extract_rxn_rec(rxn_rec_rlt) if verbose: print("\tExtracting relations compound-has_xref-xref") for cpd_node in spec_cpd_nodes: cpd_id = cpd_node.id #if verbose: print("Compound %s" %cpd_id) try: cpd_xref_rlt = [ rlt for rlt in padmetSpec.dicOfRelationIn[cpd_id] if rlt.type == "has_xref" ] entity_xref_data += extract_entity_xref( cpd_xref_rlt, padmetSpec) except KeyError: pass if padmetRef_file: if verbose: print( "\tExtracting pathways's completion rate and creating pwy_rate.tsv" ) pwy_rate(padmetRef, padmetSpec, padmetSpec_name, padmetSpec_folder + "pwy_rate.tsv") else: if verbose: print("No padmetRef given unable to calculate pathway ratio") if rxn_cpd_data: if verbose: print("\t\tCreating rxn_cpd.tsv") rxn_cpd_file(rxn_cpd_data, padmetSpec_folder + "rxn_cpd.tsv") if rxn_pwy_data: if verbose: print("\t\tCreating rxn_pwy.tsv") rxn_pwy_file(rxn_pwy_data, padmetSpec_folder + "rxn_pwy.tsv") if entity_xref_data: if verbose: print("\t\tCreating entity_xref.tsv") entity_xref_file(entity_xref_data, padmetSpec_folder + "entity_xref.tsv") if rxn_gene_data: if verbose: print("\t\tCreating rxn_gene.tsv") rxn_gene_file(rxn_gene_data, padmetSpec_folder + "rxn_gene.tsv") if rxn_rec_data: if verbose: print("\t\tCreating rxn_reconstructionData.tsv") rxn_rec_file(rxn_rec_data, padmetSpec_folder + "rxn_reconstructionData.tsv")
def main(): args = docopt.docopt(__doc__) padmet_ref = PadmetRef(args["--padmetRef"]) padmet = PadmetSpec(args["--padmetSpec"]) pathway_id = args["--pathway"] #get all reactions in pathway try: all_reactions = [rlt.id_in for rlt in padmet_ref.dicOfRelationOut.get(pathway_id,None) if rlt.type == "is_in_pathway"] except TypeError: print("%s not in padmetRef" %pathway_id) exit() reactions_in_network = [] for reaction_id in all_reactions: try: padmet.dicOfNode[reaction_id] reactions_in_network.append(reaction_id) except KeyError: pass DG=nx.DiGraph() custom_node_color = {} for reaction_id in all_reactions: # Reaction colors if reaction_id in reactions_in_network: custom_node_color[reaction_id] = "lightgreen" else: custom_node_color[reaction_id] = "red" # Reactants & products for each reaction reactants = [rlt.id_out for rlt in padmet_ref.dicOfRelationIn.get(reaction_id, None) if rlt.type == "consumes"] products = [rlt.id_out for rlt in padmet_ref.dicOfRelationIn.get(reaction_id, None) if rlt.type == "produces"] for reac in reactants: custom_node_color[reac] = "skyblue" DG.add_edge(reac, reaction_id) for prod in products: custom_node_color[prod] = "skyblue" DG.add_edge(reaction_id, prod) # https://networkx.github.io/documentation/latest/reference/generated/networkx.drawing.nx_pylab.draw_networkx.html # apt-get install graphviz graphviz-dev (python-pygraphviz) # pip install pygraphviz nx.draw_networkx(DG, pos=graphviz_layout(DG, prog='neato'), # Layout from graphviz node_size=1600, arrows=True, font_size=11, # font-size for labels node_shape='s', # shape of nodes alpha=0.6, # node & edge transparency width=1.5, # line width for edges node_list=list(custom_node_color.keys()), node_color=list(custom_node_color.values())) plt.axis('off') #save_plot(plt, 'pathway_' + pathway_id) plt.show()
def pathway_production(padmet_path, output, verbose=None, number_cpu=None, padmet_ref_path=None, pathway_completion_ratio=None): """ Create two files degradation_matrix.tsv and biosynthesis_matrix.tsv. These files have metabolite as row and organism as column. It shows the input (degradation_matrix.tsv) and output (biosynthesis_matrix.tsv) of pathways in the organism. Parameters ---------- padmet_path: str pathname of the padmet files, sep all files by ',', ex: /path/padmet1.padmet;/path/padmet2.padmet OR a folder output: str pathname of the output folder verbose: bool if True print information number_cpu: bool Number of CPU """ if not os.path.exists(output): if verbose: print("Creating %s" % output) os.makedirs(output) else: if verbose: print( "%s already exist, old comparison output folders will be overwritten" % output) if os.path.isdir(padmet_path): all_files = [ os.path.join(padmet_path, f) for f in next(os.walk(padmet_path))[2] ] else: all_files = padmet_path.split(",") if number_cpu: try: number_cpu_to_use = int(number_cpu) except ValueError: raise ValueError('The number of CPU must be an integer.') else: number_cpu_to_use = 1 if padmet_ref_path is None and pathway_completion_ratio is not None: sys.exit( 'pathway_completion_ratio option needs a padmetRef to compute the pathway completness ratio.' ) if pathway_completion_ratio is not None: try: pathway_completion_ratio = float(pathway_completion_ratio) except ValueError: sys.exit('pathway_completion_ratio must be a float') if pathway_completion_ratio < 0 or pathway_completion_ratio > 1: sys.exit('pathway_completion_ratio must be < ' + str(0) + ' and > ' + str(1)) if padmet_ref_path: padmet_ref_pathways = {} padmetRef = PadmetRef(padmet_ref_path) all_pwys = [ node for node in list(padmetRef.dicOfNode.values()) if node.type == 'pathway' ] for pwy in all_pwys: all_rxns = set([ rlt.id_in for rlt in padmetRef.dicOfRelationOut.get(pwy.id, []) if rlt.type == "is_in_pathway" ]) padmet_ref_pathways[pwy.id] = all_rxns else: padmet_ref_pathways = None all_metabolites = [] all_pathways = {} organisms = [] for padmet_file_path in all_files: padmet_id = os.path.splitext(os.path.basename(padmet_file_path))[0] pathway_inputs, pathways_outputs = extract_pahways_inputs_outputs( padmet_file_path, padmet_ref_pathways, pathway_completion_ratio) all_pathways[padmet_id] = pathway_inputs, pathways_outputs all_metabolites.extend(pathway_inputs.keys()) all_metabolites.extend(pathways_outputs.keys()) organisms.append(padmet_id) all_metabolites = set(all_metabolites) degradation_matrix = [] biosynthesis_matrix = [] for metabolite in all_metabolites: degradation_matrix.append([metabolite] + [ ','.join(all_pathways[organism][0][metabolite]) if metabolite in all_pathways[organism][0] else '' for organism in organisms ]) biosynthesis_matrix.append([metabolite] + [ ','.join(all_pathways[organism][1][metabolite]) if metabolite in all_pathways[organism][1] else '' for organism in organisms ]) degradation_file = os.path.join(output, 'degradation_matrix.tsv') with open(degradation_file, 'w') as degradation_output_file: csvwriter = csv.writer(degradation_output_file, delimiter='\t') csvwriter.writerow(['Metaboltie', *organisms]) for row in degradation_matrix: if ''.join(row[1:]) != '': csvwriter.writerow([*row]) biosynthesis_file = os.path.join(output, 'biosynthesis_matrix.tsv') with open(biosynthesis_file, 'w') as biosynthesis_output_file: csvwriter = csv.writer(biosynthesis_output_file, delimiter='\t') csvwriter.writerow(['Metaboltie', *organisms]) for row in biosynthesis_matrix: if ''.join(row[1:]) != '': csvwriter.writerow([*row])
def from_pgdb_to_padmet(pgdb_folder, db='NA', version='NA', source='GENOME', extract_gene=False, no_orphan=False, enhanced_db=False, padmetRef_file=None, verbose=False): """ Parameters ---------- pgdb_folder: str path to pgdb db: str pgdb name, default is 'NA' version: str pgdb version, default is 'NA' source: str tag reactions for traceability, default is 'GENOME' extract_gene: bool if true extract genes information no_orphan: bool if true, remove reactions without genes associated enhanced_db: bool if true, read metabolix-reactions.xml sbml file and add information in final padmet padmetRef_file: str path to padmetRef corresponding to metacyc in padmet format verbose: bool if True print information Returns ------- padmet.padmetRef: padmet instance with pgdb within pgdb data """ global regex_purge, regex_xref, list_of_relation, def_compart_in, def_compart_out regex_purge = re.compile("<.*?>|\|") regex_xref = re.compile('^\((?P<DB>\S*)\s*"(?P<ID>\S*)"') list_of_relation = [] def_compart_in = "c" def_compart_out = "e" #parsing args source = source.upper() classes_file, compounds_file, proteins_file, reactions_file, enzrxns_file, pathways_file = \ [os.path.join(pgdb_folder,_file) for _file in ["classes.dat", "compounds.dat", "proteins.dat", "reactions.dat", "enzrxns.dat", "pathways.dat"]] if enhanced_db: metabolic_reactions = os.path.join(pgdb_folder,"metabolic-reactions.xml") else: metabolic_reactions = None if extract_gene: genes_file = os.path.join(pgdb_folder,"genes.dat") else: genes_file = None now = datetime.now() today_date = now.strftime("%Y-%m-%d") if padmetRef_file: padmet = PadmetSpec() padmetRef = PadmetRef(padmetRef_file) version = padmetRef.info["DB_info"]["version"] db = padmetRef.info["DB_info"]["DB"] dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}} padmet.setInfo(dbNotes) padmet.setPolicy(padmetRef) with open(reactions_file, 'r') as f: rxns_id = [line.split(" - ")[1] for line in f.read().splitlines() if line.startswith("UNIQUE-ID")] count = 0 for rxn_id in rxns_id: count += 1 if verbose: print("%s/%s Copy %s" %(count, len(rxns_id), rxn_id)) try: padmet.copyNode(padmetRef, rxn_id) reconstructionData_id = rxn_id+"_reconstructionData_"+source if reconstructionData_id in list(padmet.dicOfNode.keys()) and verbose: print("Warning: The reaction %s seems to be already added from the same source %s" %(rxn_id, source)) reconstructionData = {"SOURCE":[source],"TOOL":["PATHWAYTOOLS"],"CATEGORY":["ANNOTATION"]} reconstructionData_rlt = Relation(rxn_id,"has_reconstructionData",reconstructionData_id) padmet.dicOfNode[reconstructionData_id] = Node("reconstructionData", reconstructionData_id, reconstructionData) padmet._addRelation(reconstructionData_rlt) except TypeError: print("%s not in padmetRef" %(rxn_id)) if extract_gene: if verbose: print("parsing genes") map_gene_ids = genes_parser(genes_file, padmet, verbose) if verbose: print("parsing proteins") dict_protein_gene_id = proteins_parser(proteins_file, padmet, verbose) mapped_dict_protein_gene_id = map_gene_id(dict_protein_gene_id, map_gene_ids) if verbose: print("parsing association enzrxns") enzrxns_parser(enzrxns_file, padmet, mapped_dict_protein_gene_id, source, verbose) else: POLICY_IN_ARRAY = [['class','is_a_class','class'], ['class','has_name','name'], ['class','has_xref','xref'], ['class','has_suppData','suppData'], ['compound','is_a_class','class'], ['compound','has_name','name'], ['compound','has_xref','xref'], ['compound','has_suppData','suppData'], ['gene','is_a_class','class'], ['gene','has_name','name'], ['gene','has_xref','xref'], ['gene','has_suppData','suppData'], ['gene','codes_for','protein'], ['pathway','is_a_class','class'], ['pathway','has_name','name'], ['pathway','has_xref','xref'], ['pathway','is_in_pathway','pathway'], ['protein','is_a_class','class'], ['protein','has_name','name'], ['protein','has_xref','xref'], ['protein','has_suppData','suppData'], ['protein','catalyses','reaction'], ['protein','is_in_species','class'], ['reaction','is_a_class','class'], ['reaction','has_name','name'], ['reaction','has_xref','xref'], ['reaction','has_suppData','suppData'], ['reaction','has_reconstructionData','reconstructionData'], ['reaction','is_in_pathway','pathway'], ['reaction','consumes','class','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','class','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','consumes','compound','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','compound','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','consumes','protein','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','protein','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','is_linked_to','gene','SOURCE:ASSIGNMENT','X:Y']] dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}} padmet = PadmetRef() if verbose: print("setting policy") padmet.setPolicy(POLICY_IN_ARRAY) if verbose: print("setting dbInfo") padmet.setInfo(dbNotes) if verbose: print("parsing classes") classes_parser(classes_file, padmet, verbose) if verbose: print("parsing compounds") compounds_parser(compounds_file, padmet, verbose) if verbose: print("parsing reactions") reactions_parser(reactions_file, padmet, extract_gene, source, verbose) if verbose: print("parsing pathways") pathways_parser(pathways_file, padmet, verbose) if extract_gene: if verbose: print("parsing genes") map_gene_ids = genes_parser(genes_file, padmet, verbose) if verbose: print("parsing proteins") dict_protein_gene_id = proteins_parser(proteins_file, padmet, verbose) mapped_dict_protein_gene_id = map_gene_id(dict_protein_gene_id, map_gene_ids) if verbose: print("parsing association enzrxns") enzrxns_parser(enzrxns_file, padmet, mapped_dict_protein_gene_id, source, verbose) if metabolic_reactions is not None: if verbose: print("enhancing db from metabolic-reactions.xml") padmet = enhance_db(metabolic_reactions, padmet, extract_gene, verbose) for rlt in list_of_relation: try: padmet.dicOfRelationIn[rlt.id_in].append(rlt) except KeyError: padmet.dicOfRelationIn[rlt.id_in] = [rlt] try: padmet.dicOfRelationOut[rlt.id_out].append(rlt) except KeyError: padmet.dicOfRelationOut[rlt.id_out] = [rlt] if extract_gene and no_orphan: all_reactions = [node for node in list(padmet.dicOfNode.values()) if node.type == "reaction"] rxn_to_del = [r for r in all_reactions if not any([rlt for rlt in padmet.dicOfRelationIn[r.id] if rlt.type == "is_linked_to"])] for rxn in rxn_to_del: padmet.delNode(rxn.id) if verbose: print("%s/%s orphan reactions (without gene association) deleted" %(len(rxn_to_del), len(all_reactions))) all_genes_linked = set([rlt.id_out for rlt in padmet.getAllRelation() if rlt.type == "is_linked_to"]) all_genes = set([node.id for node in list(padmet.dicOfNode.values()) if node.type == "gene"]) count = 0 for gene_id in [g for g in all_genes if g not in all_genes_linked]: count += 1 #if verbose: print("Removing gene without gene assoc %s" %gene_id) padmet.dicOfNode.pop(gene_id) if verbose: print("%s/%s orphan genes (not linked to any reactions) deleted" %(count, len(all_genes))) rxns = [node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"] for rxn_id in rxns: cp_rlts = set([rlt.type for rlt in padmet.dicOfRelationIn[rxn_id] if rlt.type in ["consumes","produces"]]) if len(cp_rlts) == 1: print("rxn only consume or produce, transport ???: %s" %rxn_id) padmet.delNode(rxn_id) return padmet
def visu_path_compounds(padmet_pathname, padmet_ref_pathname, pathway_ids, output_file, hide_currency_metabolites=None): """ Extract reactions from pathway and create a comppound/reaction graph. Parameters ---------- padmet_pathname: str pathname of the padmet file or a folder containing multiple padmet padmet_ref_pathname: str pathname of the padmetRef file pathway_ids: str name of the pathway (can be multiple pathways separated by a ',') output_file: str pathname of the output picture (extension can be .png or .svg) hide_currency_metabolites: bool hide currency metabolites """ if os.path.isfile(padmet_pathname): padmet = PadmetSpec(padmet_pathname) else: padmet = padmet_to_padmet.padmet_to_padmet(padmet_pathname) padmet_ref = PadmetRef(padmet_ref_pathname) pathway_ids = pathway_ids.split(',') pwy_all_reactions = [] if hide_currency_metabolites: compounds_to_hide = [ "PROTON", "WATER", "OXYGEN-MOLECULE", "NADP", "NADPH", "ATP", "PPI", "CARBON-DIOXIDE", "Pi", "ADP", "CO-A", "UDP", "NAD", "NADH", "AMP", "AMMONIA", "HYDROGEN-PEROXIDE", "Acceptor", "Donor-H2", "3-5-ADP", "GDP", "CARBON-MONOXIDE", "GTP", "FAD" ] else: compounds_to_hide = [] def get_reactions(pathway_id, padmet_ref, pwy_all_reactions): all_reactions = [ rlt.id_in for rlt in padmet_ref.dicOfRelationOut.get(pathway_id, None) if rlt.type == "is_in_pathway" ] for reaction_id in all_reactions: if reaction_id in padmet_ref.dicOfNode: node_reaction = padmet_ref.dicOfNode[reaction_id] if node_reaction.type == "pathway": pwy_all_reactions = get_reactions(node_reaction.id, padmet_ref, pwy_all_reactions) else: if reaction_id not in pwy_all_reactions: pwy_all_reactions.append(reaction_id) return pwy_all_reactions for pathway_id in pathway_ids: if pathway_id in padmet_ref.dicOfNode: tmp_pwy_all_reactions = [] tmp_pwy_all_reactions = get_reactions(pathway_id, padmet_ref, tmp_pwy_all_reactions) pwy_all_reactions.extend(tmp_pwy_all_reactions) else: print("Pathway " + pathway_id + " not in PadmetRef " + padmet_ref_pathname) reactions_in_network = [] for reaction_id in pwy_all_reactions: if reaction_id in padmet.dicOfNode: reactions_in_network.append(reaction_id) DG = nx.DiGraph() custom_node_color = OrderedDict() for reaction_id in pwy_all_reactions: # Reaction colors if reaction_id in reactions_in_network: custom_node_color[reaction_id] = "lightgreen" else: custom_node_color[reaction_id] = "red" # Reactants & products for each reaction reactants = [ rlt.id_out for rlt in padmet_ref.dicOfRelationIn.get(reaction_id, None) if rlt.type == "consumes" ] products = [ rlt.id_out for rlt in padmet_ref.dicOfRelationIn.get(reaction_id, None) if rlt.type == "produces" ] for reac in reactants: if reac not in compounds_to_hide: if reac not in custom_node_color: custom_node_color[reac] = "skyblue" DG.add_edge(reac, reaction_id) if 'REVERSIBLE' in padmet_ref.dicOfNode[reaction_id].misc[ 'DIRECTION']: DG.add_edge(reaction_id, reac) for prod in products: if prod not in compounds_to_hide: if prod not in custom_node_color: custom_node_color[prod] = "skyblue" DG.add_edge(reaction_id, prod) if 'REVERSIBLE' in padmet_ref.dicOfNode[reaction_id].misc[ 'DIRECTION']: DG.add_edge(prod, reaction_id) # https://networkx.github.io/documentation/latest/reference/generated/networkx.drawing.nx_pylab.draw_networkx.html # apt-get install graphviz graphviz-dev (python-pygraphviz) # pip install pygraphviz nx.draw_networkx( DG, pos=graphviz_layout(DG, prog='neato'), # Layout from graphviz node_size=1600, arrows=True, font_size=11, # font-size for labels node_shape='s', # shape of nodes alpha=0.6, # node & edge transparency width=1.5, # line width for edges nodelist=list(custom_node_color.keys()), node_color=[ custom_node_color[node] for node in list(custom_node_color.keys()) ]) plt.axis('off') plt.savefig(output_file, bbox_inches='tight') plt.clf()
def visu_path_pathways(padmet_pathname, padmet_ref_pathname, pathway_ids, output_file): """ Extract reactions from pathway and create a comppound/reaction graph. Parameters ---------- padmet_pathname: str pathname of the padmet file or a folder containing multiple padmet padmet_ref_pathname: str pathname of the padmetRef file pathway_ids: str name of the pathway (can be multiple pathways separated by a ',') output_file: str pathname of the output picture (extension can be .png or .svg) hide_compounds: bool hide common compounds (like water or proton) """ if os.path.isfile(padmet_pathname): padmet = PadmetSpec(padmet_pathname) else: padmet = padmet_to_padmet.padmet_to_padmet(padmet_pathname) padmet_ref = PadmetRef(padmet_ref_pathname) # Check if the padmets and padmetref contain the INPUT-COMPOUNDS and OUTPUT-COMPOUNDS in pathway node.misc needed for this analysis. padmetref_input_compounds_in_pwys = [ 1 for node_pathway in padmet_ref.dicOfNode if padmet_ref.dicOfNode[node_pathway].type == 'pathway' and 'INPUT-COMPOUNDS' in padmet_ref.dicOfNode[node_pathway].misc ] padmetref_output_compounds_in_pwys = [ 1 for node_pathway in padmet_ref.dicOfNode if padmet_ref.dicOfNode[node_pathway].type == 'pathway' and 'OUTPUT-COMPOUNDS' in padmet_ref.dicOfNode[node_pathway].misc ] if sum(padmetref_input_compounds_in_pwys) == 0 or sum( padmetref_output_compounds_in_pwys) == 0: sys.exit( "The padmetref " + padmet_ref_pathname + " does not contain INPUT-COMPOUNDS and OUTPUT-COMPOUNDS in the pathway node, can't produce the pathway visualization." ) padmet_input_compounds_in_pwys = [ 1 for node_pathway in padmet.dicOfNode if padmet.dicOfNode[node_pathway].type == 'pathway' and 'INPUT-COMPOUNDS' in padmet.dicOfNode[node_pathway].misc ] padmet_output_compounds_in_pwys = [ 1 for node_pathway in padmet.dicOfNode if padmet.dicOfNode[node_pathway].type == 'pathway' and 'OUTPUT-COMPOUNDS' in padmet.dicOfNode[node_pathway].misc ] if sum(padmet_input_compounds_in_pwys) == 0 or sum( padmet_output_compounds_in_pwys) == 0: sys.exit( "The padmet " + padmet_pathname + " does not contain INPUT-COMPOUNDS and OUTPUT-COMPOUNDS in the pathway node, can't produce the pathway visualization." ) # Extract pathway from superpathways. pathway_ids = pathway_ids.split(',') all_pathways = [] def get_pathways(pathway_id, padmet_ref, pwy_all_reactions): all_reactions_pathways = [ rlt.id_in for rlt in padmet_ref.dicOfRelationOut.get(pathway_id, None) if rlt.type == "is_in_pathway" ] for reaction_pathway_id in all_reactions_pathways: if reaction_pathway_id in padmet_ref.dicOfNode: node_reaction = padmet_ref.dicOfNode[reaction_pathway_id] if node_reaction.type == "pathway": pwy_all_reactions.append(reaction_pathway_id) pwy_all_reactions = get_pathways(node_reaction.id, padmet_ref, pwy_all_reactions) return pwy_all_reactions for pathway_id in pathway_ids: if pathway_id in padmet_ref.dicOfNode: tmp_pwy_all_pathways = [] tmp_pwy_all_pathways = get_pathways(pathway_id, padmet_ref, tmp_pwy_all_pathways) all_pathways.extend(tmp_pwy_all_pathways) else: print("Pathway " + pathway_id + " not in PadmetRef " + padmet_ref_pathname) # Find pathway in the padmet file. pathways_in_network = [] for pathway_id in all_pathways: if pathway_id in padmet.dicOfNode: pathways_in_network.append(pathway_id) # Create the graph. DG = nx.DiGraph() custom_node_color = OrderedDict() for pwy in all_pathways: node_pathway = padmet_ref.dicOfNode[pwy] if pwy in pathways_in_network: custom_node_color[pwy] = "lightgreen" else: custom_node_color[pwy] = "red" if 'INPUT-COMPOUNDS' in node_pathway.misc and 'OUTPUT-COMPOUNDS' in node_pathway.misc: for reactant in node_pathway.misc['INPUT-COMPOUNDS'][0].split(','): if reactant not in custom_node_color: custom_node_color[reactant] = "skyblue" DG.add_edge(reactant, pwy) for product in node_pathway.misc['OUTPUT-COMPOUNDS'][0].split(','): if product not in custom_node_color: custom_node_color[product] = "skyblue" DG.add_edge(pwy, product) # https://networkx.github.io/documentation/latest/reference/generated/networkx.drawing.nx_pylab.draw_networkx.html # apt-get install graphviz graphviz-dev (python-pygraphviz) # pip install pygraphviz nx.draw_networkx( DG, pos=graphviz_layout(DG, prog='neato'), # Layout from graphviz node_size=1600, arrows=True, font_size=11, # font-size for labels node_shape='s', # shape of nodes alpha=0.6, # node & edge transparency width=1.5, # line width for edges nodelist=list(custom_node_color.keys()), node_color=[ custom_node_color[node] for node in list(custom_node_color.keys()) ]) plt.axis('off') plt.savefig(output_file, bbox_inches='tight') plt.clf()
def reaction_figure_creation(reaction_file, output_folder, upset_cluster=None, padmetRef_file=None, pvclust=None, verbose=False): """ Create dendrogram, upset figure (if upset argument) and compare reactiosn in species. Parameters ---------- reaction_file: str path to reaction file upset_cluster: int the number of cluster you want in the intervene figure output_folder: str path to output folder padmet_ref_file: str path to padmet ref file pvclust: bool boolean to launch or not R pvclust dendrogram """ # Check if output_folder exists, if not create it. output_folder_tree_cluster = output_folder + '/tree_cluster/' output_folder_comparison = output_folder + '/tree_cluster/comparison_cluster/' output_folder_specific = output_folder_tree_cluster + 'specific_reactions/' output_folder_absent = output_folder_tree_cluster + 'absent_reactions/' if upset_cluster: output_folder_upset = output_folder + '/upset_graph' temp_data_folder = output_folder + '/upset_graph/temp_data/' folders = [ output_folder, output_folder_tree_cluster, output_folder_comparison, output_folder_specific, output_folder_absent, output_folder_upset, temp_data_folder ] else: folders = [ output_folder, output_folder_tree_cluster, output_folder_comparison, output_folder_specific, output_folder_absent ] for folder in folders: if not os.path.isdir(folder): os.mkdir(folder) if not os.path.exists(reaction_file): raise FileNotFoundError("No reactions.tsv file accessible at " + reaction_file) # Read the reactions file with pandas. all_reactions_dataframe = pa.read_csv(reaction_file, sep='\t') # Keep column containing absence-presence of reactions. # (columns with (sep=;) are column with gene name linked to reactions) # (columns with _formula contain the reaction formula) columns = [ column for column in all_reactions_dataframe.columns if '(sep=;)' not in column ] columns = [column for column in columns if '_formula' not in column] reactions_dataframe = all_reactions_dataframe[columns].copy() reactions_dataframe.set_index('reaction', inplace=True) # Transpose the matrix to have species as index and reactions as columns. absence_presence_matrix = reactions_dataframe.transpose() # Compute a distance matrix using the Jaccard distance between species and condense it. condensed_distance_matrix_jaccard = pdist(absence_presence_matrix, metric='jaccard') # Hierarchical clustering on the condensed distance matrix. linkage_matrix = linkage(condensed_distance_matrix_jaccard, method='average', metric='jaccard') # Draw a dendrogram of the clustering. reaction_dendrogram = dendrogram(linkage_matrix, labels=absence_presence_matrix.index, leaf_font_size=100, leaf_rotation=90) # Extract organisms. organisms = absence_presence_matrix.index.tolist() # Create Newick tree tree = to_tree(linkage_matrix, False) newick_tree = getNewick(tree, "", tree.dist, organisms) newick_path = os.path.join(output_folder, 'newick.txt') with open(newick_path, 'w') as f: f.write(newick_tree) # Specific reactions for each species. absent_and_specific_reactions(reactions_dataframe, output_folder_tree_cluster, output_folder_specific, output_folder_absent, organisms) if pvclust: pvclust_reactions_dataframe = all_reactions_dataframe[columns].copy() pvclust_reactions_dataframe.set_index('reaction', inplace=True) # Create pvclust dendrogram. pvclust_dendrogram(pvclust_reactions_dataframe, organisms, output_folder) # Extract all the nodes inside the clustering. _, node_list = to_tree(linkage_matrix, rd=True) if padmetRef_file: padmet_ref = PadmetRef(padmetRef_file) metacyc_to_ecs = { node.id: node.misc['EC-NUMBER'] for node in padmet_ref.dicOfNode.values() if node.type == "reaction" and 'EC-NUMBER' in node.misc } else: metacyc_to_ecs = {} # For each cluster, give the list of organisms in it. # Then write it in a file. len_longest_cluster_id = len(str(max([node.id for node in node_list]))) cluster_leaf_species = {} for node in node_list: node_leafs = node.pre_order(lambda child: organisms[child.id] if child.is_leaf() else None) cluster_leaf_species[ 'cluster_' + str(node.id).zfill(len_longest_cluster_id)] = node_leafs species_clustered_df = pa.DataFrame(columns=organisms) for cluster_leaf in cluster_leaf_species: tmp_organism_cluster = [ True if organism in cluster_leaf_species[cluster_leaf] else False for organism in species_clustered_df.columns ] species_clustered_df.loc[cluster_leaf] = tmp_organism_cluster species_clustered_df = species_clustered_df.replace(np.nan, False) species_clustered_df.to_csv(output_folder_tree_cluster + 'clustered_species.tsv', sep='\t') # Create xml structure from hierarchical clustering. root = hclust_to_xml(linkage_matrix) # Post order traversal of the tree. d = {} for element in root.iter(): d[element.tag] = [child.tag for child in element] post_order_clusters = {} for node in node_list: node_label = 'cluster_' + str(node.id).zfill(len_longest_cluster_id) if d[node_label] == []: species = cluster_leaf_species[node_label] tmp_reactions = reactions_dataframe[ reactions_dataframe[species].all(1) == True] post_order_clusters[node_label] = tmp_reactions.index.tolist() else: if set(post_order_clusters[d[node_label][0]]).intersection( set(post_order_clusters[d[node_label][1]])) != set(): post_order_clusters[node_label] = set( post_order_clusters[d[node_label][0]]).intersection( set(post_order_clusters[d[node_label][1]])) else: post_order_clusters[node_label] = set( post_order_clusters[d[node_label][0]]).union( set(post_order_clusters[d[node_label][1]])) # Use xml structure to create intersection files. reactions_clust = create_intersection_files(root, cluster_leaf_species, reactions_dataframe, output_folder_tree_cluster, metacyc_to_ecs) comparison_cluster(reactions_clust, output_folder_comparison) # Add label contaning cluster name and reaction number to each node. check_label = add_dendrogram_node_label(reaction_dendrogram, node_list, reactions_clust, len_longest_cluster_id) if not check_label: print('Warning: no label for cluster name have been added.') # Create dendrogram, bbox option adjsut the figure size. plt.savefig(output_folder + '/reaction_dendrogram.png', bbox_inches='tight') plt.clf() if upset_cluster: dendrogram_fclusters = create_cluster(reactions_dataframe, absence_presence_matrix, linkage_matrix) create_supervenn(absence_presence_matrix, reactions_dataframe, output_folder_upset, dendrogram_fclusters, k, verbose)
parser.add_argument("-s", "--species", help="padmet species", required=True) parser.add_argument("-p", "--pathway", help="pathway name", required=True) parser.add_argument("-o", "--output", help="SBML output filename", required=False) args = parser.parse_args() if args.output: outfile = args.output else: outfile = args.pathway.lower() + ".sbml" p_ref = PadmetRef(args.reference) #reads reference padmet p_spec = PadmetSpec(args.species) #reads organism padmet rxn_list = [ rlt.id_in for rlt in p_ref.dicOfRelationOut[args.pathway] if rlt.type == "is_in_pathway" and p_ref.dicOfNode[rlt.id_in].type == "reaction" ] for rxn_id in rxn_list: print(rxn_id) p_spec.copyNode(p_ref, rxn_id) padmet_to_sbml(p_spec, outfile, sbml_lvl=2, verbose=True)
def biggAPI_to_padmet(output, pwy_file=None, verbose=False): """ Extract BIGG database using the api. Create a padmet file. Escape reactions of biomass. Require internet access ! Allows to extract the bigg database from the API to create a padmet. 1./ Get all reactions universal id from http://bigg.ucsd.edu/api/v2/universal/reactions, escape reactions of biomass. 2./ Using async_list, extract all the informations for each reactions (compounds, stochio, name ...) 3./ Need to use sleep time to avoid to lose the server access. 4./ Because the direction fo the reaction is not set by default in bigg. We get all the models where the reaction is and the final direction will the one found in more than 75% 5./ Also extract xrefs Parameters ---------- output: str path to output, the padmet file. pwy_file: str path to pathway file, add kegg pathways, line:'pwy_id, pwy_name, x, rxn_id'. verbose: bool if True print information """ now = datetime.now() today_date = now.strftime("%Y-%m-%d") #print(verbose,today_date,version, output, classes_file, compounds_file, proteins_file, reactions_file, enzrxns_file, pathways_file) policyInArray = [ ['compound', 'has_name', 'name'], ['compound', 'has_xref', 'xref'], ['compound', 'has_suppData', 'suppData'], ['gene', 'has_name', 'name'], ['gene', 'has_xref', 'xref'], ['gene', 'has_suppData', 'suppData'], ['gene', 'codes_for', 'protein'], ['pathway', 'has_name', 'name'], ['pathway', 'has_xref', 'xref'], ['pathway', 'is_in_pathway', 'pathway'], ['protein', 'has_name', 'name'], ['protein', 'has_xref', 'xref'], ['protein', 'has_suppData', 'suppData'], ['protein', 'catalyses', 'reaction'], ['reaction', 'has_name', 'name'], ['reaction', 'has_xref', 'xref'], ['reaction', 'has_suppData', 'suppData'], ['reaction', 'has_reconstructionData', 'reconstructionData'], ['reaction', 'is_in_pathway', 'pathway'], [ 'reaction', 'consumes', 'class', 'STOICHIOMETRY', 'X', 'COMPARTMENT', 'Y' ], [ 'reaction', 'produces', 'class', 'STOICHIOMETRY', 'X', 'COMPARTMENT', 'Y' ], [ 'reaction', 'consumes', 'compound', 'STOICHIOMETRY', 'X', 'COMPARTMENT', 'Y' ], [ 'reaction', 'produces', 'compound', 'STOICHIOMETRY', 'X', 'COMPARTMENT', 'Y' ], [ 'reaction', 'consumes', 'protein', 'STOICHIOMETRY', 'X', 'COMPARTMENT', 'Y' ], [ 'reaction', 'produces', 'protein', 'STOICHIOMETRY', 'X', 'COMPARTMENT', 'Y' ], ['reaction', 'is_linked_to', 'gene', 'SOURCE:ASSIGNMENT', 'X:Y'] ] dbNotes = { "PADMET": { "Creation": today_date, "version": "2.6" }, "DB_info": { "DB": "BIGG", "version": "1.5" } } padmetRef = PadmetRef() if verbose: print("setting policy") padmetRef.setPolicy(policyInArray) if verbose: print("setting dbInfo") padmetRef.setInfo(dbNotes) list_of_relation = [] if verbose: print("Getting all reactions ids") url_bigg = 'http://bigg.ucsd.edu/api/v2/' raw_data = requests.get(url_bigg + "universal/reactions").json()['results'] all_reactions_ids = [ rxn_dict['bigg_id'] for rxn_dict in raw_data if not rxn_dict['bigg_id'].startswith("BIOMASS") ] if verbose: print("%s reactions to extract" % (len(all_reactions_ids))) """ if verbose: print("Extracting informations... Wait") step = 100 rxn_lower_index = -(step) rxn_upper_index = 0 rxn_responses = [] all_range = len(all_reactions_ids)/step for i in range(all_range): async_list = [] rxn_lower_index += step rxn_upper_index += step for rxn_id in all_reactions_ids[rxn_lower_index:rxn_upper_index]: action_item = grequests.get(url_bigg + "universal/reactions/" +rxn_id) async_list.append(action_item) new_responses = [r.json() for r in grequests.map(async_list)] rxn_responses += new_responses print("%s/%s done" %(len(rxn_responses),len(all_reactions_ids))) if rxn_upper_index != len(all_reactions_ids): async_list = [] last_index = len(all_reactions_ids) - rxn_upper_index rxn_lower_index += step rxn_upper_index += last_index for rxn_id in all_reactions_ids[rxn_lower_index:rxn_upper_index]: action_item = grequests.get(url_bigg + "universal/reactions/" +rxn_id) async_list.append(action_item) new_responses = [r.json() for r in grequests.map(async_list)] rxn_responses += new_responses """ if verbose: print("updating padmet") count = 0 all_reactions_ids = [ i for i in all_reactions_ids if 'biomass' not in i.upper() ] for rxn_id in [ i for i in all_reactions_ids if not i.startswith("BIOMASS") ]: count += 1 if verbose: print("reaction: %s, %s/%s" % (rxn_id, count, len(all_reactions_ids))) if rxn_id not in list(padmetRef.dicOfNode.keys()): rxn_response = requests.get(url_bigg + "universal/reactions/" + rxn_id) rxn_dict = rxn_response.json() rxn_metabolites = rxn_dict["metabolites"] if len(rxn_metabolites) > 1: rxn_id = rxn_dict['bigg_id'] rxn_name = rxn_dict["name"] all_models_id = [ i["bigg_id"] for i in rxn_dict["models_containing_reaction"] ] async_list = [] for model_id in all_models_id: action_item = grequests.get(url_bigg + "models/" + model_id + "/reactions/" + rxn_id) async_list.append(action_item) models_responses = [ r.json() for r in grequests.map(async_list) ] all_lower_bound = [ i["results"][0]["lower_bound"] for i in models_responses ] ratio_not_rev = float(all_lower_bound.count(0)) / float( len(all_lower_bound)) if verbose: print("Reaction not reversible in %s/%s model(s)" % (all_lower_bound.count(0), len(all_lower_bound))) if ratio_not_rev >= 0.75: rxn_direction = "LEFT-TO-RIGHT" if verbose: print("Reaction not reversible") else: rxn_direction = "REVERSIBLE" if verbose: print("Reaction reversible") padmetRef.createNode("reaction", rxn_id, { "COMMON_NAME": [rxn_name], "DIRECTION": [rxn_direction] }) rxn_xrefs = rxn_dict["database_links"] xref_id = rxn_id + "_xrefs" xref_node = padmetRef.createNode("xref", xref_id) has_xref_rlt = Relation(rxn_id, "has_xref", xref_id) list_of_relation.append(has_xref_rlt) for db, k in list(rxn_xrefs.items()): _id = k[0]["id"] if db in list(xref_node.misc.keys() ) and _id not in xref_node.misc[db]: xref_node.misc[db].append(_id) else: xref_node.misc[db] = [_id] for metabo_dict in rxn_metabolites: metabo_id = metabo_dict["bigg_id"] metabo_name = metabo_dict["name"] metabo_compart = metabo_dict["compartment_bigg_id"] metabo_stoich = metabo_dict["stoichiometry"] try: padmetRef.dicOfNode[metabo_id] except KeyError: padmetRef.createNode("compound", metabo_id, {"COMMON_NAME": [metabo_name]}) if metabo_stoich < 0: consumes_rlt = Relation( rxn_id, "consumes", metabo_id, { "STOICHIOMETRY": [abs(metabo_stoich)], "COMPARTMENT": [metabo_compart] }) list_of_relation.append(consumes_rlt) else: produces_rlt = Relation( rxn_id, "produces", metabo_id, { "STOICHIOMETRY": [abs(metabo_stoich)], "COMPARTMENT": [metabo_compart] }) list_of_relation.append(produces_rlt) else: if verbose: print("%s already in padmet" % rxn_id) continue if verbose: print("Adding all relations") count = 0 for rlt in list_of_relation: count += 1 if verbose: print("relation %s/%s" % (count, len(list_of_relation))) try: padmetRef.dicOfRelationIn[rlt.id_in].append(rlt) except KeyError: padmetRef.dicOfRelationIn[rlt.id_in] = [rlt] try: padmetRef.dicOfRelationOut[rlt.id_out].append(rlt) except KeyError: padmetRef.dicOfRelationOut[rlt.id_out] = [rlt] if pwy_file: add_kegg_pwy(pwy_file, padmetRef, verbose) if verbose: print("Generating file: %s" % output) padmetRef.generateFile(output)
def analysis_on_group(group_name, groups, config_data, pvclust, nb_cpu_to_use, verbose): """Create reaction dendrogram and extract specific reactions using metabolic networks. Args: group_name (str): Name of the group from group_template.tsv. groups (list): All the species inside the group. config_data (dict): Dictionary with all configuration paths. pvclust (boolean): use also pvclust to create reaction dendrogram nb_cpu_to_use (int): number of CPU for multiprocessing verbose (bool): Verbose. """ database_path = config_data['database_path'] padmetRef = PadmetRef(database_path) padmet_from_networks_path = config_data['padmet_from_networks_path'] analysis_path = config_data['analysis_path'] all_padmet_path = [ os.path.join(padmet_from_networks_path, name + ".padmet") for name in groups ] group_analysis_path = analysis_path + '/' + group_name if not os.path.isdir(group_analysis_path): if len(groups) == 1: sys.exit('A group must contain more than one member.') for padmet_path in all_padmet_path: if not os.path.exists(padmet_path): org_name = os.path.splitext(os.path.basename(padmet_path))[0] sys.exit( "Padmet file of organism %s from group %s not found in %s" % (org_name, group_name, padmet_from_networks_path)) # Compare the padmet to create the reactions.tsv file needed to create the reaction dendrogram. compare_padmet.compare_padmet(padmet_path=",".join(all_padmet_path), output=group_analysis_path, padmetRef=padmetRef, verbose=verbose, number_cpu=nb_cpu_to_use) padmet_to_padmet.padmet_to_padmet( ",".join(all_padmet_path), group_analysis_path + '/' + group_name + '_panmetabolism.padmet') sbmlGenerator.padmet_to_sbml(padmet=group_analysis_path + '/' + group_name + '_panmetabolism.padmet', output=group_analysis_path + '/' + group_name + '_panmetabolism.sbml', verbose=verbose) dendrogram_reactions_distance.reaction_figure_creation( reaction_file=group_analysis_path + '/reactions.tsv', output_folder=group_analysis_path + '/dendrogram_output', padmetRef_file=database_path, pvclust=pvclust, verbose=verbose) else: print( group_analysis_path + ' already exists. Delete it if you want to relaunch the analysis.')
def sbml_to_padmetSpec(sbml, padmetSpec_file, padmetRef_file=None, output=None, mapping=None, mapping_tag="_dict.csv", source_tool=None, source_category=None, db="NA", version="NA", verbose=False): """ Convert 1 - n sbml to padmet file. sbml var is file or dir padmetSpec_file: path to new padmet file to create or old padmet to update padmetRef_file: path to database of reference to use for data standardization output: path to new padmet file, if none, overwritte padmetSpec_file source_tool: tool used to create this sbml(s) ex Orthofinder source_category: Category of the tool ex: orthology if new padmet without padmetRef: db: database used ex: metacyc, bigg version: version of the database, 23, 18... if padmetRef, not padmetSpec: if padmetRef exist, instance PadmetRef else init PadmetRef update padmetRef if padmetSpec: if padmetRef, check if exist else raise Error if padmetSpec exist, instance PadmetSpec else init PadmetSpec update padmetSpec using padmetRef if padmetRef #TODO """ if verbose: print( 'sbml_to_padmet decodes reactions and metabolites using regular expression.' ) print( 'The reaction/metabolites IDs format used by sbml_to_padmet is: prefix + "_" + ID + "_" + optional_suffix. ' ) print( 'prefix is a one character indicating the type, like R for reaction or M for metabolite.' ) print( 'optional_suffix is a one or two characters indicating the compartment.' ) if output is None: output = padmetSpec_file #if sbml is a dir: sbml_files are all files with extension .sbml or .xml within dir #else: sbml = my_sbml.sbml or sbml= my_sbml1.sbml;my_sbml2.sml if os.path.isdir(sbml): sbml_files = [ os.path.join(sbml, _f) for _f in next(os.walk(sbml))[2] if _f.endswith(".sbml") or _f.endswith(".xml") ] else: sbml_files = [sbml] #PadmetRef used for mapping and data standardization if padmetRef_file: padmetRef = PadmetRef(padmetRef_file) else: padmetRef = None if os.path.isfile(padmetSpec_file): padmet_to_update = PadmetSpec(padmetSpec_file) else: padmet_id = os.path.splitext(os.path.basename(output))[0] padmet_to_update = instantiate_padmet("PadmetSpec", padmetRef_file, padmet_id, db, version, verbose) #if sbml is a directory, recover all file path in a list. if no => only one file: create a list with only this file #sbml_mapping_dict = {'/path/to/my_sbml1.sbml': '/path/to/my_sbml1_dict.csv' // None} sbml_mapping_dict = {} if os.path.isdir(sbml): for sbml_file in sbml_files: mapping_file = os.path.splitext(sbml_file)[0] + mapping_tag if not os.path.isfile(mapping_file) or not padmetRef: mapping_file = None sbml_mapping_dict[sbml_file] = mapping_file else: sbml_mapping_dict[sbml] = mapping if len(list(sbml_mapping_dict.keys())) == 0: raise IOError("No sbml found based on %s" % sbml) for sbml_file, mapping_file in list(sbml_mapping_dict.items()): if mapping_file: force = False else: force = True if verbose: if mapping_file: print("Updating %s from %s using mapping dictionnary %s" % (os.path.basename(padmetSpec_file), os.path.basename(sbml_file), os.path.basename(mapping_file))) else: print("Updating %s from %s" % (os.path.basename(padmetSpec_file), os.path.basename(sbml_file))) padmet_to_update.updateFromSbml(sbml_file=sbml_file, padmetRef=padmetRef, mapping_file=mapping_file, verbose=verbose, force=force, source_category=source_category, source_tool=source_tool) padmet_to_update.generateFile(output)
def modelSeed_to_padmet(rxn_file, pwy_file, output, verbose=False): """ #TODO """ global list_of_relation now = datetime.now() today_date = now.strftime("%Y-%m-%d") #print(verbose,today_date,version, output, classes_file, compounds_file, proteins_file, reactions_file, enzrxns_file, pathways_file) policyInArray = [ ['compound', 'has_name', 'name'], ['compound', 'has_xref', 'xref'], ['compound', 'has_suppData', 'suppData'], ['gene', 'has_name', 'name'], ['gene', 'has_xref', 'xref'], ['gene', 'has_suppData', 'suppData'], ['gene', 'codes_for', 'protein'], ['pathway', 'has_name', 'name'], ['pathway', 'has_xref', 'xref'], ['pathway', 'is_in_pathway', 'pathway'], ['protein', 'has_name', 'name'], ['protein', 'has_xref', 'xref'], ['protein', 'has_suppData', 'suppData'], ['protein', 'catalyses', 'reaction'], ['reaction', 'has_name', 'name'], ['reaction', 'has_xref', 'xref'], ['reaction', 'has_suppData', 'suppData'], ['reaction', 'has_reconstructionData', 'reconstructionData'], ['reaction', 'is_in_pathway', 'pathway'], [ 'reaction', 'consumes', 'class', 'STOICHIOMETRY', 'X', 'COMPARTMENT', 'Y' ], [ 'reaction', 'produces', 'class', 'STOICHIOMETRY', 'X', 'COMPARTMENT', 'Y' ], [ 'reaction', 'consumes', 'compound', 'STOICHIOMETRY', 'X', 'COMPARTMENT', 'Y' ], [ 'reaction', 'produces', 'compound', 'STOICHIOMETRY', 'X', 'COMPARTMENT', 'Y' ], [ 'reaction', 'consumes', 'protein', 'STOICHIOMETRY', 'X', 'COMPARTMENT', 'Y' ], [ 'reaction', 'produces', 'protein', 'STOICHIOMETRY', 'X', 'COMPARTMENT', 'Y' ], ['reaction', 'is_linked_to', 'gene', 'SOURCE:ASSIGNMENT', 'X:Y'] ] dbNotes = { "PADMET": { "Creation": today_date, "version": "2.6" }, "DB_info": { "DB": "MODELSEED", "version": "1.0" } } padmetRef = PadmetRef() if verbose: print("setting policy") padmetRef.setPolicy(policyInArray) if verbose: print("setting dbInfo") padmetRef.setInfo(dbNotes) list_of_relation = [] rxn_data = json.load(open(rxn_file)) #remove biomass rxn: rxn_data.pop("rxn12985") if verbose: print("updating padmet") count = 0 for rxn_id, rxn_dict in list(rxn_data.items()): count += 1 if verbose: print("reaction: %s, %s/%s" % (rxn_id, count, len(rxn_data))) try: if not rxn_dict["compound_ids"]: raise KeyError except KeyError: print(rxn_id) continue if rxn_id not in list(padmetRef.dicOfNode.keys()): if rxn_dict["reversibility"] == ">": rxn_direction = "LEFT-TO-RIGHT" else: rxn_direction = "REVERSIBLE" rxn_name = rxn_dict["name"] padmetRef.createNode("reaction", rxn_id, { "COMMON_NAME": [rxn_name], "DIRECTION": [rxn_direction] }) rxn_metabolites = rxn_dict["stoichiometry"].split(";") for metabo_data in rxn_metabolites: metabo_data = metabo_data.replace("???", "\"") try: metabo_temp, metabo_name = metabo_data.split("\"")[:2] metabo_stoich, metabo_id, metabo_compart = metabo_temp.split( ":")[:3] except ValueError: metabo_stoich, metabo_id, metabo_compart, metabo_name = metabo_data.split( ":")[:4] metabo_stoich = float(metabo_stoich) #from modelSeed github if metabo_compart == "0": metabo_compart = "c" elif metabo_compart == "1": metabo_compart = "e" elif metabo_compart == "2": metabo_compart = "p" try: padmetRef.dicOfNode[metabo_id] except KeyError: padmetRef.createNode("compound", metabo_id, {"COMMON_NAME": [metabo_name]}) if metabo_stoich < 0: consumes_rlt = Relation( rxn_id, "consumes", metabo_id, { "STOICHIOMETRY": [abs(metabo_stoich)], "COMPARTMENT": [metabo_compart] }) list_of_relation.append(consumes_rlt) else: produces_rlt = Relation( rxn_id, "produces", metabo_id, { "STOICHIOMETRY": [abs(metabo_stoich)], "COMPARTMENT": [metabo_compart] }) list_of_relation.append(produces_rlt) else: if verbose: print("%s already in padmet" % rxn_id) continue with open(pwy_file) as csvfile: reader = csv.DictReader(csvfile, delimiter='\t') pwy_raw_data = [row for row in reader] for pwy_raw in pwy_raw_data: pwy_id = pwy_raw["Source ID"] pwy_names = [pwy_raw["Name"], pwy_raw["Aliases"]] rxn_ids = pwy_raw["Reactions"].split("|") try: padmetRef.dicOfNode[pwy_id] except KeyError: padmetRef.createNode("pathway", pwy_id, {"COMMON_NAME": pwy_names}) for rxn_id in rxn_ids: pwy_rlt = Relation(rxn_id, "is_in_pathway", pwy_id) list_of_relation.append(pwy_rlt) if verbose: print("Adding all relations") count = 0 for rlt in list_of_relation: count += 1 if verbose: print("relation %s/%s" % (count, len(list_of_relation))) try: padmetRef.dicOfRelationIn[rlt.id_in].append(rlt) except KeyError: padmetRef.dicOfRelationIn[rlt.id_in] = [rlt] try: padmetRef.dicOfRelationOut[rlt.id_out].append(rlt) except KeyError: padmetRef.dicOfRelationOut[rlt.id_out] = [rlt] """ if pwy_file: add_kegg_pwy(pwy_file, padmetRef, verbose) """ if verbose: print("Generating file: %s" % output) padmetRef.generateFile(output)